{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "2282fa51",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T18:15:38.793475Z",
     "start_time": "2025-06-30T18:15:37.976302Z"
    }
   },
   "outputs": [],
   "source": [
    "import re,os,time,collections,ast\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from datetime import datetime\n",
    "from pandas.tseries.offsets import MonthEnd\n",
    "from collections import Counter,defaultdict\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib as mpl\n",
    "from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator, get_single_color_func\n",
    "from itertools import combinations \n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "pd.set_option('display.max_columns', None)\n",
    "pd.set_option('display.width', None)\n",
    "pd.set_option('display.float_format', lambda x: '%.2f' % x)\n",
    "os.chdir('/Volumes/T9/Replicate File-202506/') \n",
    "\n",
    "def count_type(x):\n",
    "    d = defaultdict(int)\n",
    "    for i,j in x.items():\n",
    "        d[keywords_type[i]]+=j      \n",
    "    return dict(d) "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1f3981c5",
   "metadata": {},
   "source": [
    "# Prepare Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "1cfd939e",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T18:15:39.513164Z",
     "start_time": "2025-06-30T18:15:39.481924Z"
    }
   },
   "outputs": [],
   "source": [
    "SignalDoc=pd.read_csv('data/SignalDoc.csv') # The file \"SignalDoc.csv\" is provided by Andrew Chen and Tom Zimmermann and is available for download at www.openassetpricing.com. \n",
    "SignalDoc.columns=[x.replace('.','_').replace(' ','_').replace('-','_') for x in SignalDoc.columns]\n",
    "SignalDoc.to_csv('data/SignalDoc2.csv',index=False)  # Standardize the variable names"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c8667981",
   "metadata": {},
   "source": [
    "To extract short-leg and long-leg classifications for each Anomaly–PERMNO–YearMonth combination, run the \"signalbase.sas\" script in SAS. This will generate the dataset \"data/signalbase.sas7bdat\" (>20GB).\n",
    "\n",
    "Since signalbase.sas7bdat is too large for efficient matching in the subsequent steps, we retain only the Anomaly–PERMNO–YearMonth pairs that appear in our Seeking Alpha article sample (\"SA/permno_yymm_leg.csv\") or analyst report sample (\"ins/permno_yymm_leg.csv\")."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "a08ae60e",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T18:04:03.642842Z",
     "start_time": "2025-06-30T17:54:33.918597Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "50000000\n",
      "50000000\n",
      "50000000\n",
      "50000000\n",
      "50000000\n",
      "50000000\n",
      "50000000\n",
      "30028336\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>permno</th>\n",
       "      <th>yyyymm</th>\n",
       "      <th>Anomaly</th>\n",
       "      <th>short_leg</th>\n",
       "      <th>long_leg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>350316849</th>\n",
       "      <td>10001.00</td>\n",
       "      <td>200601.00</td>\n",
       "      <td>grcapx</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>350316850</th>\n",
       "      <td>10002.00</td>\n",
       "      <td>200601.00</td>\n",
       "      <td>grcapx</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>350316852</th>\n",
       "      <td>10025.00</td>\n",
       "      <td>200601.00</td>\n",
       "      <td>grcapx</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>350316853</th>\n",
       "      <td>10026.00</td>\n",
       "      <td>200601.00</td>\n",
       "      <td>grcapx</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>350316854</th>\n",
       "      <td>10028.00</td>\n",
       "      <td>200601.00</td>\n",
       "      <td>grcapx</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            permno    yyyymm Anomaly  short_leg  long_leg\n",
       "350316849 10001.00 200601.00  grcapx       0.00      0.00\n",
       "350316850 10002.00 200601.00  grcapx       1.00      0.00\n",
       "350316852 10025.00 200601.00  grcapx       0.00      0.00\n",
       "350316853 10026.00 200601.00  grcapx       0.00      0.00\n",
       "350316854 10028.00 200601.00  grcapx       0.00      0.00"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "save_path1='data/signalbase_table4.csv'\n",
    "save_path2='data/signalbase_2006_2021.csv'\n",
    "chunksize = 50000000\n",
    "i=0\n",
    "for short_long_leg in pd.read_sas('data/signalbase.sas7bdat',encoding='utf-8', chunksize=chunksize):\n",
    "    print(len(short_long_leg))\n",
    "    short_long_leg=short_long_leg[short_long_leg['short_leg'].notnull()]\n",
    "    _df=short_long_leg[(short_long_leg['yyyymm']>=2023)]\n",
    "    if os.path.exists(save_path1):\n",
    "        _df.to_csv(save_path1, mode='a',header=False,index=False)\n",
    "    else:\n",
    "        _df.to_csv(save_path1, index=False) \n",
    "    \n",
    "    short_long_leg=short_long_leg[(short_long_leg['yyyymm']>=200601)&(short_long_leg['yyyymm']<=202112)]\n",
    "    if os.path.exists(save_path2):\n",
    "        short_long_leg.to_csv(save_path2, mode='a',header=False,index=False)\n",
    "    else:\n",
    "        short_long_leg.to_csv(save_path2, index=False) \n",
    "short_long_leg[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "7131eb8d",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T18:06:32.492737Z",
     "start_time": "2025-06-30T18:04:25.442688Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "For SeekingAlpha Articles:\n",
      "142811 141746 5793\n",
      "BUYSELL\n",
      "0.00    59155\n",
      "1.00    73061\n",
      "3.00     9530\n",
      "Name: id, dtype: int64\n",
      "For Analyst Reports:\n",
      "1728535 1718353 7009\n",
      "BUYSELL\n",
      "1.00    1032917\n",
      "2.00     625232\n",
      "3.00      60204\n",
      "Name: id, dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>permno</th>\n",
       "      <th>yyyymm</th>\n",
       "      <th>Anomaly</th>\n",
       "      <th>short_leg</th>\n",
       "      <th>long_leg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>55976.00</td>\n",
       "      <td>200601</td>\n",
       "      <td>AM</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>55976.00</td>\n",
       "      <td>200601</td>\n",
       "      <td>AbnormalAccruals</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>55976.00</td>\n",
       "      <td>200601</td>\n",
       "      <td>Accruals</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>55976.00</td>\n",
       "      <td>200601</td>\n",
       "      <td>AdExp</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>55976.00</td>\n",
       "      <td>200601</td>\n",
       "      <td>AnalystRevision</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    permno  yyyymm           Anomaly  short_leg  long_leg\n",
       "0 55976.00  200601                AM       0.00      0.00\n",
       "1 55976.00  200601  AbnormalAccruals       0.00      0.00\n",
       "2 55976.00  200601          Accruals       0.00      0.00\n",
       "3 55976.00  200601             AdExp       0.00      0.00\n",
       "4 55976.00  200601   AnalystRevision       0.00      0.00"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "short_long_leg=pd.read_csv('data/signalbase_2006_2021.csv') \n",
    "print('For SeekingAlpha Articles:')\n",
    "df=pd.read_csv('private/SA/full_freq.csv',usecols=['id','permno','yyyymm','BUYSELL'])\n",
    "print(len(df),df['id'].nunique(),df['permno'].nunique())\n",
    "print(df.groupby('BUYSELL')['id'].nunique())  # BUYSELL=1 for Buy recommendations; BUYSELL=3 for Sell; BUYSELL=2 for Hold.\n",
    "permno_yymm=df[['permno','yyyymm']].drop_duplicates()\n",
    "permno_yymm_leg=pd.merge(permno_yymm,short_long_leg,on=['permno','yyyymm'],how='inner') \n",
    "permno_yymm_leg.to_csv('data/permno_yymm_leg_SA.csv', index=False)  \n",
    "\n",
    "print('For Analyst Reports:')\n",
    "df=pd.read_csv('private/ins/full_freq.csv',usecols=['id','permno','yyyymm','BUYSELL'])\n",
    "print(len(df),df['id'].nunique(),df['permno'].nunique())\n",
    "print(df.groupby('BUYSELL')['id'].nunique())  # BUYSELL=1 for long_ideas; BUYSELL=3 for short_ides; BUYSELL=0 for others.\n",
    "permno_yymm=df[['permno','yyyymm']].drop_duplicates()\n",
    "permno_yymm_leg=pd.merge(permno_yymm,short_long_leg,on=['permno','yyyymm'],how='inner')\n",
    "permno_yymm_leg.to_csv('data/permno_yymm_leg_ins.csv' , index=False)  \n",
    "permno_yymm_leg[:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9432efa3",
   "metadata": {},
   "source": [
    "# Tables 2, 3, 5A, 5C, 5E"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5e2bfd28",
   "metadata": {},
   "source": [
    "## Data Preparation for Tables 2, 3, 5A, 5C, 5E, and Online Appendix A4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d0e3ea2c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T18:15:45.239108Z",
     "start_time": "2025-06-30T18:15:44.991015Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>date</th>\n",
       "      <th>yyyymm</th>\n",
       "      <th>permno</th>\n",
       "      <th>long_ideas</th>\n",
       "      <th>short_ideas</th>\n",
       "      <th>BUYSELL</th>\n",
       "      <th>author.id</th>\n",
       "      <th>word_length</th>\n",
       "      <th>keyword_freq</th>\n",
       "      <th>safe_1</th>\n",
       "      <th>lottery_1</th>\n",
       "      <th>supremacy_1</th>\n",
       "      <th>safe_2</th>\n",
       "      <th>lottery_2</th>\n",
       "      <th>supremacy_2</th>\n",
       "      <th>safe_3</th>\n",
       "      <th>lottery_3</th>\n",
       "      <th>supremacy_3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>142808</th>\n",
       "      <td>4477390</td>\n",
       "      <td>2021-12-31</td>\n",
       "      <td>202112</td>\n",
       "      <td>75905.00</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>105723.00</td>\n",
       "      <td>2412</td>\n",
       "      <td>{'steadily': 1, 'steady': 1, 'potential': 1, '...</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.17</td>\n",
       "      <td>0.37</td>\n",
       "      <td>0.08</td>\n",
       "      <td>0.17</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.04</td>\n",
       "      <td>0.04</td>\n",
       "      <td>0.41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>142809</th>\n",
       "      <td>4477433</td>\n",
       "      <td>2021-12-31</td>\n",
       "      <td>202112</td>\n",
       "      <td>14273.00</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>106444.00</td>\n",
       "      <td>2074</td>\n",
       "      <td>{'expanding': 1, 'upside': 6, 'potential': 2, ...</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.39</td>\n",
       "      <td>0.48</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.10</td>\n",
       "      <td>1.06</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.39</td>\n",
       "      <td>0.10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>142810</th>\n",
       "      <td>4477404</td>\n",
       "      <td>2021-12-31</td>\n",
       "      <td>202112</td>\n",
       "      <td>93096.00</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>105981.00</td>\n",
       "      <td>2358</td>\n",
       "      <td>{'expanding': 2, 'innovative': 1, 'potential':...</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.21</td>\n",
       "      <td>0.85</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.89</td>\n",
       "      <td>0.08</td>\n",
       "      <td>0.21</td>\n",
       "      <td>0.17</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             id        date  yyyymm   permno  long_ideas  short_ideas  \\\n",
       "142808  4477390  2021-12-31  202112 75905.00           1            0   \n",
       "142809  4477433  2021-12-31  202112 14273.00           1            0   \n",
       "142810  4477404  2021-12-31  202112 93096.00           1            0   \n",
       "\n",
       "        BUYSELL  author.id  word_length  \\\n",
       "142808     1.00  105723.00         2412   \n",
       "142809     1.00  106444.00         2074   \n",
       "142810     1.00  105981.00         2358   \n",
       "\n",
       "                                             keyword_freq  safe_1  lottery_1  \\\n",
       "142808  {'steadily': 1, 'steady': 1, 'potential': 1, '...    0.00       0.17   \n",
       "142809  {'expanding': 1, 'upside': 6, 'potential': 2, ...    0.00       0.39   \n",
       "142810  {'expanding': 2, 'innovative': 1, 'potential':...    0.00       0.21   \n",
       "\n",
       "        supremacy_1  safe_2  lottery_2  supremacy_2  safe_3  lottery_3  \\\n",
       "142808         0.37    0.08       0.17         0.33    0.04       0.04   \n",
       "142809         0.48    0.00       0.10         1.06    0.00       0.39   \n",
       "142810         0.85    0.00       0.25         0.89    0.08       0.21   \n",
       "\n",
       "        supremacy_3  \n",
       "142808         0.41  \n",
       "142809         0.10  \n",
       "142810         0.17  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 'safe_1','supremacy_1','lottery_1': Word Frequency for Institutional Investors’ Survey-Based Wordlists.\n",
    "# 'safe_2','supremacy_2','lottery_2': Word Frequency for Retail Investors’ Survey-Based Wordlists.\n",
    "# 'safe_3','supremacy_3','lottery_3': Word Frequency for Self-Defined Wordlists.   \n",
    "pd.read_csv('private/SA/full_freq.csv')[-3:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c39d3e4d",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T18:17:29.568360Z",
     "start_time": "2025-06-30T18:15:48.137362Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SA Sell\n",
      "SA Buy\n",
      "ins Sell\n",
      "ins Buy\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>short_leg</th>\n",
       "      <th>level_0</th>\n",
       "      <th>level_1</th>\n",
       "      <th>description</th>\n",
       "      <th>all_except_short</th>\n",
       "      <th>short_leg</th>\n",
       "      <th>type</th>\n",
       "      <th>k123</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>408</th>\n",
       "      <td>safe_1</td>\n",
       "      <td>count</td>\n",
       "      <td>AM</td>\n",
       "      <td>795727.00</td>\n",
       "      <td>188516.00</td>\n",
       "      <td>n</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>safe_1</td>\n",
       "      <td>mean</td>\n",
       "      <td>AM</td>\n",
       "      <td>0.06</td>\n",
       "      <td>0.06</td>\n",
       "      <td>safe</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>612</th>\n",
       "      <td>supremacy_1</td>\n",
       "      <td>mean</td>\n",
       "      <td>AM</td>\n",
       "      <td>0.36</td>\n",
       "      <td>0.42</td>\n",
       "      <td>supremacy</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1020</th>\n",
       "      <td>lottery_1</td>\n",
       "      <td>mean</td>\n",
       "      <td>AM</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.29</td>\n",
       "      <td>lottery</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>204</th>\n",
       "      <td>safe_1</td>\n",
       "      <td>std</td>\n",
       "      <td>AM</td>\n",
       "      <td>0.12</td>\n",
       "      <td>0.12</td>\n",
       "      <td>safe</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3671</th>\n",
       "      <td>supremacy_3</td>\n",
       "      <td>mean</td>\n",
       "      <td>zerotrade6M</td>\n",
       "      <td>0.15</td>\n",
       "      <td>0.15</td>\n",
       "      <td>supremacy</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4079</th>\n",
       "      <td>lottery_3</td>\n",
       "      <td>mean</td>\n",
       "      <td>zerotrade6M</td>\n",
       "      <td>0.23</td>\n",
       "      <td>0.24</td>\n",
       "      <td>lottery</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3263</th>\n",
       "      <td>safe_3</td>\n",
       "      <td>std</td>\n",
       "      <td>zerotrade6M</td>\n",
       "      <td>0.09</td>\n",
       "      <td>0.08</td>\n",
       "      <td>safe</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3875</th>\n",
       "      <td>supremacy_3</td>\n",
       "      <td>std</td>\n",
       "      <td>zerotrade6M</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.25</td>\n",
       "      <td>supremacy</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4283</th>\n",
       "      <td>lottery_3</td>\n",
       "      <td>std</td>\n",
       "      <td>zerotrade6M</td>\n",
       "      <td>0.28</td>\n",
       "      <td>0.29</td>\n",
       "      <td>lottery</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4284 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "short_leg      level_0 level_1  description  all_except_short  short_leg  \\\n",
       "408             safe_1   count           AM         795727.00  188516.00   \n",
       "0               safe_1    mean           AM              0.06       0.06   \n",
       "612        supremacy_1    mean           AM              0.36       0.42   \n",
       "1020         lottery_1    mean           AM              0.25       0.29   \n",
       "204             safe_1     std           AM              0.12       0.12   \n",
       "...                ...     ...          ...               ...        ...   \n",
       "3671       supremacy_3    mean  zerotrade6M              0.15       0.15   \n",
       "4079         lottery_3    mean  zerotrade6M              0.23       0.24   \n",
       "3263            safe_3     std  zerotrade6M              0.09       0.08   \n",
       "3875       supremacy_3     std  zerotrade6M              0.25       0.25   \n",
       "4283         lottery_3     std  zerotrade6M              0.28       0.29   \n",
       "\n",
       "short_leg       type k123  \n",
       "408                n    1  \n",
       "0               safe    1  \n",
       "612        supremacy    1  \n",
       "1020         lottery    1  \n",
       "204             safe    1  \n",
       "...              ...  ...  \n",
       "3671       supremacy    3  \n",
       "4079         lottery    3  \n",
       "3263            safe    3  \n",
       "3875       supremacy    3  \n",
       "4283         lottery    3  \n",
       "\n",
       "[4284 rows x 7 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "keep_vars=['description','all_except_short','short_leg','type','level_1']\n",
    "keep_vars2=['description','all_except_long','long_leg','type','level_1']\n",
    "safe_supremacy_lottery=['safe','supremacy','lottery']\n",
    "safe_supremacy_lottery=sum([[x+kkk for x in safe_supremacy_lottery] for kkk in ['_1','_2','_3']],[])\n",
    "safe=[x for x in safe_supremacy_lottery if x[:5]=='safe_']\n",
    "agg_str={x:['mean','std'] for x in safe_supremacy_lottery}\n",
    "agg_str.update({x:['mean','std','count'] for x in safe})\n",
    "\n",
    "folder='V1/'\n",
    "for SA_ins in ['SA','ins']:\n",
    "    df=pd.read_csv('private/'+SA_ins+'/full_freq.csv',usecols=['id','permno', 'yyyymm','word_length','BUYSELL']+safe_supremacy_lottery)\n",
    "    permno_yymm_leg=pd.read_csv('data/permno_yymm_leg_'+SA_ins+'.csv')\n",
    "    df=pd.merge(df, permno_yymm_leg,on=['permno','yyyymm'],how='inner')\n",
    "\n",
    "    for sample in ['Sell','Buy']: \n",
    "        print(SA_ins,sample)\n",
    "        if sample=='Buy':\n",
    "              df1=df[df['BUYSELL']==1]\n",
    "        if sample=='Sell':\n",
    "              df1=df[df['BUYSELL']==3] \n",
    "        save_root=folder+sample+'/'\n",
    "        if not os.path.exists(save_root):\n",
    "            os.makedirs(save_root)  \n",
    "\n",
    "        df2=df1.groupby(['Anomaly','short_leg']).agg(agg_str).T.stack(level=0).reset_index()\n",
    "        df2[['type','k123']]=df2['level_0'].str.rsplit('_', n=1, expand=True) \n",
    "        df2.loc[df2['level_1']=='count', 'type'] ='n'\n",
    "        df2=df2.sort_values(['k123','Anomaly','level_1']).rename(columns={'Anomaly':'description', 0:'all_except_short',1:'short_leg'})\n",
    "        for k in ['1','2','3']:  \n",
    "            df2[df2['k123']==k][keep_vars].to_csv(save_root+SA_ins.lower()+'_anomaly_'+k+'.csv',index=False)\n",
    "\n",
    "        df3=df1.groupby(['Anomaly','long_leg']).agg(agg_str).T.stack(level=0).reset_index()\n",
    "        df3[['type','k123']]=df3['level_0'].str.rsplit('_', n=1, expand=True) \n",
    "        df3.loc[df3['level_1']=='count', 'type'] ='n'\n",
    "        df3=df3.sort_values(['k123','Anomaly','level_1']).rename(columns={'Anomaly':'description',0:'all_except_long',1:'long_leg'})\n",
    "        for k in ['1','2','3']:  \n",
    "            df3[df3['k123']==k][keep_vars2].to_csv(save_root+SA_ins.lower()+'_anomaly2_'+k+'.csv',index=False)\n",
    "df2"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "026b7a87",
   "metadata": {},
   "source": [
    "## Table 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "be545b42",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T18:17:29.746025Z",
     "start_time": "2025-06-30T18:17:29.570130Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "332\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>description</th>\n",
       "      <th>Anomaly</th>\n",
       "      <th>description_full</th>\n",
       "      <th>Cat_Data</th>\n",
       "      <th>Cat_Economic</th>\n",
       "      <th>Citations</th>\n",
       "      <th>Paper</th>\n",
       "      <th>rough</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>High52</td>\n",
       "      <td>52 week high</td>\n",
       "      <td>52 Week High</td>\n",
       "      <td>Price</td>\n",
       "      <td>Momentum</td>\n",
       "      <td>893.00</td>\n",
       "      <td>George and Hwang 2004 JF</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Residualmomentum6M</td>\n",
       "      <td>6 month residual momentum</td>\n",
       "      <td>6 Month Residual Momentum</td>\n",
       "      <td>Price</td>\n",
       "      <td>Momentum</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Blitz, Huij and Martens 2011 JEmpFin</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Abnormalaccruals</td>\n",
       "      <td>Abnormal Accruals</td>\n",
       "      <td>Abnormal Accruals</td>\n",
       "      <td>Accounting</td>\n",
       "      <td>Accruals</td>\n",
       "      <td>1727.00</td>\n",
       "      <td>Xie 2001 AR</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Delayacct</td>\n",
       "      <td>Accounting component of price delay</td>\n",
       "      <td>Accounting Component Of Price Delay</td>\n",
       "      <td>Accounting</td>\n",
       "      <td>Lead Lag</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Callen, Khan and Lu 2013 CAR</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Accrualquality</td>\n",
       "      <td>Accrual Quality</td>\n",
       "      <td>Accrual Quality</td>\n",
       "      <td>Accounting</td>\n",
       "      <td>Accruals</td>\n",
       "      <td>4380.00</td>\n",
       "      <td>Francis, LaFond, Olsson, Schipper 2005 JAE</td>\n",
       "      <td>1.00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          description                              Anomaly  \\\n",
       "0              High52                         52 week high   \n",
       "1  Residualmomentum6M            6 month residual momentum   \n",
       "2    Abnormalaccruals                    Abnormal Accruals   \n",
       "3           Delayacct  Accounting component of price delay   \n",
       "4      Accrualquality                      Accrual Quality   \n",
       "\n",
       "                      description_full    Cat_Data Cat_Economic  Citations  \\\n",
       "0                         52 Week High       Price     Momentum     893.00   \n",
       "1            6 Month Residual Momentum       Price     Momentum        NaN   \n",
       "2                    Abnormal Accruals  Accounting     Accruals    1727.00   \n",
       "3  Accounting Component Of Price Delay  Accounting     Lead Lag        NaN   \n",
       "4                      Accrual Quality  Accounting     Accruals    4380.00   \n",
       "\n",
       "                                        Paper  rough  \n",
       "0                    George and Hwang 2004 JF    NaN  \n",
       "1        Blitz, Huij and Martens 2011 JEmpFin    NaN  \n",
       "2                                 Xie 2001 AR    NaN  \n",
       "3                Callen, Khan and Lu 2013 CAR    NaN  \n",
       "4  Francis, LaFond, Olsson, Schipper 2005 JAE   1.00  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def detail_table_func(ins_sa_doc, types_list, short_long_pairs, Buy_Sell_list, V, measure):\n",
    "    ins=pd.DataFrame()\n",
    "    long_short_except=pd.DataFrame()\n",
    "    for i in range(len(types_list)):\n",
    "        _tmp_path=os.path.join(V,Buy_Sell_list[i],ins_sa_doc[i]+'.csv')\n",
    "        p01=pd.read_csv(_tmp_path)\n",
    "        p01=p01[p01['type'].isin(['n',types_list[i]])].drop_duplicates()\n",
    "        try:\n",
    "            p02=pd.read_csv(re.sub(r'_anomaly(?!.*_anomaly)', '_anomaly2', _tmp_path).replace('_modal','_modal2'))\n",
    "            p02=p02[p02['type'].isin(['n',types_list[i]])].drop_duplicates()\n",
    "            p=pd.merge(p01,p02,on=['description', 'type','level_1'],how='outer')\n",
    "        except:\n",
    "            p=p01\n",
    "            p['long_leg']=np.nan\n",
    "            p['all_except_long']=np.nan\n",
    "        p=p[p['type'].isin([types_list[i],'n'])]\n",
    "        p['type']=np.where(p['type']=='n',types_list[i],p['type'])\n",
    "        p['description']=[x.title() for x in p['description']]\n",
    "\n",
    "        leg1=short_long_pairs[i][0]\n",
    "        leg2=short_long_pairs[i][1]\n",
    "\n",
    "        _p1=p[p['level_1']=='mean']\n",
    "        _p1['diff_mean']=_p1[leg1]-_p1[leg2]\n",
    "        _p1['delta_mean']=(_p1[leg1]-_p1[leg2])/_p1[leg2]\n",
    "        _p2=p[p['level_1']=='std']\n",
    "        _p2['stdev1']=_p2[leg1]*_p2[leg1]\n",
    "        _p2['stdev2']=_p2[leg2]*_p2[leg2]\n",
    "        _p3=p[p['level_1']=='count']\n",
    "        _p3['n1']=_p3[leg1]\n",
    "        _p3['n2']=_p3[leg2]\n",
    "\n",
    "        p2=pd.merge(_p1[['description', 'type','diff_mean', 'delta_mean',leg1,leg2]],_p2[['description',\n",
    "                        'type','stdev1','stdev2']],on=['description', 'type'],how='outer')\n",
    "        p2=pd.merge(p2,_p3[['description','n1','n2']],on='description',how='outer')\n",
    "        p2['all_leg']=(p2[leg1]*p2['n1']+p2[leg2]*p2['n2'])/(p2['n1']+p2['n2'])\n",
    "        p2['delta_mean_adj']=(p2[leg1]-p2[leg2])/p2['all_leg']\n",
    "        ins=pd.concat([ins,p2]).sort_values(['description','type'])\n",
    "        \n",
    "        if measure==2:\n",
    "            ins['delta_mean']=ins['delta_mean_adj']\n",
    "        \n",
    "        _long_short_except=_p1[['description','short_leg','all_except_short','long_leg','all_except_long']]\n",
    "        _long_short_except.columns=[x+'_'+types_list[i]  if '_leg' in x or 'all_except' in x else x for x in _long_short_except.columns]\n",
    "        n_long_short_except=_p3[['description','short_leg','all_except_short','long_leg','all_except_long']]\n",
    "        n_long_short_except.columns=['n_'+x+'_'+types_list[i]  if '_leg' in x or 'all_except' in x else x for x in n_long_short_except.columns]\n",
    "        _long_short_except=pd.merge(n_long_short_except,_long_short_except,on='description',how='outer')\n",
    "        try:\n",
    "            long_short_except=pd.merge(long_short_except,_long_short_except,on='description',how='outer')\n",
    "        except:\n",
    "            long_short_except=_long_short_except.copy()\n",
    "    long_short_except.columns=['description']+[x+'_'+ins_sa_doc[0].split('_')[0].strip('0123456789') for x in long_short_except.columns[1:]]\n",
    "\n",
    "    ins['stderr']=np.sqrt(ins['stdev1']/ins['n1'] + ins['stdev2']/ins['n2'])\n",
    "    ins['t']=ins['diff_mean']/ins['stderr']\n",
    "    ins['I']=np.where(ins['t']>1.96,1,0)\n",
    "    ins['delta_mean2']=ins['delta_mean']*ins['I']\n",
    "    \n",
    "    ins2=ins.sort_values(['description','delta_mean2','t'],ascending=[True,False,False])\n",
    "    final=ins2[ins2['t']>1.96].groupby('description')['type'].first().reset_index().rename(columns={'type':'final'})\n",
    "\n",
    "    ins2['delta_mean2']=ins2['delta_mean']\n",
    "    ins_output=pd.pivot_table(ins2[['description', 'type','delta_mean2','t']],index=['description'],\n",
    "                    columns=['type'],values=['delta_mean2','t']).reset_index()\n",
    "    ins_output_vars=[('description',   '')]\n",
    "    ins_output_vars_new=['description']\n",
    "    for i in range(len(types_list)):\n",
    "        ins_output_vars.extend([('delta_mean2', types_list[i]),(          't', types_list[i])]) \n",
    "        ins_output_vars_new.extend([types_list[i], types_list[i]+'_t']) \n",
    "    ins_output=ins_output[ins_output_vars]\n",
    "    ins_output.columns=ins_output_vars_new   \n",
    "\n",
    "    ins_output1=ins_output.copy()\n",
    "    for _var in types_list:\n",
    "        ins_output1[_var]=[round(x,4) for x in ins_output1[_var]]\n",
    "        ins_output1[_var]=np.where( abs(ins_output1[_var+'_t'])<1.96,np.nan,ins_output1[_var])\n",
    "        \n",
    "        ins_output1[_var+'_t']=np.where( abs(ins_output1[_var+'_t'])<1.96,-99,ins_output1[_var+'_t'])\n",
    "        ins_output1[_var+'_t']=['('+'{:.2f}'.format(round(x,2))+')' if x!=-99 else '' for x in ins_output1[_var+'_t']]\n",
    "\n",
    "    ins_output1=pd.merge(ins_output1,final,on='description',how='left') \n",
    "    ins_output1['final']=np.where(ins_output1[types_list].max(axis=1)<=0,'none',ins_output1['final'])  #.sum(axis=1)==0\n",
    "    ins_output1['final']=ins_output1['final'].fillna('none')\n",
    "    ins_output1.columns=['description']+[x+'_'+ins_sa_doc[0].split('_')[0].strip('0123456789') for x in ins_output1.columns[1:]]\n",
    "    return ins_output1,long_short_except\n",
    "\n",
    "def ins_sa_table_func(ins_sa_doc, types_list, short_long_pairs, Buy_Sell_list, V,measure=1):\n",
    "    print('Input Variables: ', ins_sa_doc, types_list, short_long_pairs)\n",
    "    _ins_sa_doc=[x.replace('ins','sa') for x in ins_sa_doc]\n",
    "    try:\n",
    "        ins_result,long_short_except_ins=detail_table_func(ins_sa_doc, types_list, short_long_pairs, Buy_Sell_list, V,measure)\n",
    "        ins_empty=0\n",
    "    except:\n",
    "        ins_empty=1\n",
    "    try:\n",
    "        sa_result,long_short_except_sa=detail_table_func(_ins_sa_doc, types_list, short_long_pairs, Buy_Sell_list, V,measure)\n",
    "        if ins_empty==1:\n",
    "            ins_result=sa_result[['description']]\n",
    "            long_short_except_ins=long_short_except_sa[['description']]\n",
    "    except:\n",
    "        sa_result=ins_result[['description']]\n",
    "        long_short_except_sa=long_short_except_ins[['description']]\n",
    "        \n",
    "    ins_sa_result=pd.merge(ins_result,sa_result,on='description',how='outer')\n",
    "    ins_sa_result['description']=[x.title() for x in ins_sa_result['description']]\n",
    "    table=pd.merge(paper_list,ins_sa_result,on='description',how='inner').drop(['Cat_Data'],axis=1).sort_values('description').reset_index(drop=True)\n",
    "    return table\n",
    "\n",
    "\n",
    "def show_summary_table(table_list,save_name):\n",
    "    ins_table0=pd.DataFrame()\n",
    "    sa_table0=pd.DataFrame()\n",
    "    types_list_all=[]\n",
    "    for  t1 in table_list:\n",
    "        table_name=t1['table_name'].iloc[0]\n",
    "        try:\n",
    "            if len(t1[t1['final_ins'].notnull()]):\n",
    "                count_vars=[re.sub('_ins$','',x) for x in t1.columns if x[-4:]=='_ins' and '_t_' not in x and 'final_' not in x]\n",
    "                types_list_all=types_list_all+[x for x in count_vars if x not in types_list_all]\n",
    "                _ins=pd.DataFrame([t1[t1[x+'_ins']>0][x+'_ins'].count() for x in count_vars]).T \n",
    "                _ins.columns=count_vars\n",
    "                _ins['none']=len(t1[t1['final_ins']=='none'])\n",
    "                _ins['N']=len(t1[(t1['final_ins']!='')&t1['final_ins'].notnull()])\n",
    "                _ins['most_consistent']=0\n",
    "                _ins['table_name']=t1['table_name']\n",
    "                _ins.loc[1]=[len(t1[t1['final_ins']==x]) for x in count_vars]+[_ins['none'].loc[0],_ins['N'].loc[0],1,table_name]\n",
    "                ins_table0=pd.concat([ins_table0,_ins])\n",
    "        except:\n",
    "            pass\n",
    "        try:   \n",
    "            if len(t1[t1['final_sa'].notnull()]):\n",
    "                count_vars=[re.sub('_sa$','',x) for x in t1.columns if x[-3:]=='_sa' and '_t_' not in x and 'final_' not in x]\n",
    "                types_list_all=types_list_all+[x for x in count_vars if x not in types_list_all]\n",
    "                _sa=pd.DataFrame([t1[t1[x+'_sa']>0][x+'_sa'].count() for x in count_vars]).T \n",
    "                _sa.columns=count_vars\n",
    "                _sa['none']=len(t1[t1['final_sa']=='none'])\n",
    "                _sa['N']=len(t1[(t1['final_sa']!='')&t1['final_sa'].notnull()])\n",
    "                _sa['most_consistent']=0\n",
    "                _sa['table_name']=t1['table_name']\n",
    "                _sa.loc[1]= [len(t1[t1['final_sa']==x]) for x in count_vars]+[_sa['none'].loc[0],_sa['N'].loc[0],1,table_name]\n",
    "                sa_table0=pd.concat([sa_table0,_sa])# sa_table0.sort_values(['delete_word','most_consistent'])\n",
    "        except:\n",
    "            pass\n",
    "\n",
    "    types_list_all=types_list_all+['none'] \n",
    "    ins_table=ins_table0.copy()\n",
    "    sa_table=sa_table0.copy()\n",
    "    for _var in types_list_all:\n",
    "        try:\n",
    "            ins_table[_var]=ins_table[_var]/ins_table['N']\n",
    "        except:\n",
    "            pass\n",
    "        try:\n",
    "            sa_table[_var]=sa_table[_var]/sa_table['N']\n",
    "        except:\n",
    "            pass\n",
    "    ins_table['data']='Analyst Reports'  \n",
    "    sa_table['data']='SA'    \n",
    "    ins_sa_table=pd.concat([ins_table,sa_table])\n",
    "#     ins_sa_table=ins_sa_table[['table_name','data','most_consistent']+types_list_all]\n",
    "    ins_sa_table=ins_sa_table[['table_name','data','most_consistent']+types_list_all]\n",
    "    ins_sa_table=ins_sa_table.sort_values(['table_name','data','most_consistent'])\n",
    "    for _var in types_list_all:\n",
    "        ins_sa_table[_var]=['{:.0%}'.format(x)  for x in ins_sa_table[_var]]\n",
    "    ins_sa_table.to_excel('results/'+save_name+'.xlsx',index=False)\n",
    "    return ins_sa_table\n",
    "\n",
    "paper_list=pd.read_excel('data/paper_citation_list.xlsx').rename(columns={'description':'description_full', 'Acronym':'description'})\n",
    "paper_list['description']=[x.title() for x in paper_list['description']]\n",
    "paper_list['description_full']=[x.title() for x in paper_list['description_full']]\n",
    "paper_list['Cat_Economic']=[x.title() for x in paper_list['Cat_Economic']]\n",
    "print(len(paper_list))\n",
    "paper_list[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "1b6e76f5",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T18:17:29.924950Z",
     "start_time": "2025-06-30T18:17:29.746830Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Input Variables:  ['ins_anomaly_1', 'ins_anomaly_1', 'ins_anomaly_1'] ['safe', 'supremacy', 'lottery'] [('short_leg', 'all_except_short'), ('short_leg', 'all_except_short'), ('short_leg', 'all_except_short')]\n",
      "Input Variables:  ['ins_anomaly_2', 'ins_anomaly_2', 'ins_anomaly_2'] ['safe', 'supremacy', 'lottery'] [('short_leg', 'all_except_short'), ('short_leg', 'all_except_short'), ('short_leg', 'all_except_short')]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>table_name</th>\n",
       "      <th>data</th>\n",
       "      <th>most_consistent</th>\n",
       "      <th>safe</th>\n",
       "      <th>supremacy</th>\n",
       "      <th>lottery</th>\n",
       "      <th>none</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table_2A</td>\n",
       "      <td>Analyst Reports</td>\n",
       "      <td>0</td>\n",
       "      <td>12%</td>\n",
       "      <td>29%</td>\n",
       "      <td>68%</td>\n",
       "      <td>20%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table_2A</td>\n",
       "      <td>Analyst Reports</td>\n",
       "      <td>1</td>\n",
       "      <td>8%</td>\n",
       "      <td>17%</td>\n",
       "      <td>55%</td>\n",
       "      <td>20%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table_2A</td>\n",
       "      <td>SA</td>\n",
       "      <td>0</td>\n",
       "      <td>7%</td>\n",
       "      <td>16%</td>\n",
       "      <td>57%</td>\n",
       "      <td>30%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table_2A</td>\n",
       "      <td>SA</td>\n",
       "      <td>1</td>\n",
       "      <td>6%</td>\n",
       "      <td>11%</td>\n",
       "      <td>54%</td>\n",
       "      <td>30%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table_2B</td>\n",
       "      <td>Analyst Reports</td>\n",
       "      <td>0</td>\n",
       "      <td>24%</td>\n",
       "      <td>27%</td>\n",
       "      <td>69%</td>\n",
       "      <td>12%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table_2B</td>\n",
       "      <td>Analyst Reports</td>\n",
       "      <td>1</td>\n",
       "      <td>5%</td>\n",
       "      <td>18%</td>\n",
       "      <td>65%</td>\n",
       "      <td>12%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table_2B</td>\n",
       "      <td>SA</td>\n",
       "      <td>0</td>\n",
       "      <td>10%</td>\n",
       "      <td>23%</td>\n",
       "      <td>63%</td>\n",
       "      <td>22%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table_2B</td>\n",
       "      <td>SA</td>\n",
       "      <td>1</td>\n",
       "      <td>8%</td>\n",
       "      <td>13%</td>\n",
       "      <td>57%</td>\n",
       "      <td>22%</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  table_name             data  most_consistent safe supremacy lottery none\n",
       "0   table_2A  Analyst Reports                0  12%       29%     68%  20%\n",
       "1   table_2A  Analyst Reports                1   8%       17%     55%  20%\n",
       "0   table_2A               SA                0   7%       16%     57%  30%\n",
       "1   table_2A               SA                1   6%       11%     54%  30%\n",
       "0   table_2B  Analyst Reports                0  24%       27%     69%  12%\n",
       "1   table_2B  Analyst Reports                1   5%       18%     65%  12%\n",
       "0   table_2B               SA                0  10%       23%     63%  22%\n",
       "1   table_2B               SA                1   8%       13%     57%  22%"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Default Input Variables:\n",
    "ins_sa_doc=['ins_anomaly_1','ins_anomaly_1','ins_anomaly_1']\n",
    "types_list=['safe', 'supremacy', 'lottery']\n",
    "Buy_Sell_list=['Buy','Buy','Buy']\n",
    "short_long_pairs=[('short_leg','all_except_short')]*3\n",
    "\n",
    "table_2A=ins_sa_table_func(ins_sa_doc, types_list, short_long_pairs, Buy_Sell_list, 'V1')\n",
    "table_2A['table_name']='table_2A'\n",
    "\n",
    "table_2B=ins_sa_table_func(['ins_anomaly_2','ins_anomaly_2','ins_anomaly_2'], types_list, short_long_pairs, Buy_Sell_list, 'V1')\n",
    "table_2B['table_name']='table_2B'\n",
    "\n",
    "show_summary_table([table_2A,table_2B],'table_2')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e39efa50",
   "metadata": {},
   "source": [
    "## Table 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "237fdb62",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T18:17:29.962240Z",
     "start_time": "2025-06-30T18:17:29.926231Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>description</th>\n",
       "      <th>LongDescription</th>\n",
       "      <th>Cat_Economic</th>\n",
       "      <th>safe_ins</th>\n",
       "      <th>safe_t_ins</th>\n",
       "      <th>supremacy_ins</th>\n",
       "      <th>supremacy_t_ins</th>\n",
       "      <th>lottery_ins</th>\n",
       "      <th>lottery_t_ins</th>\n",
       "      <th>safe_sa</th>\n",
       "      <th>safe_t_sa</th>\n",
       "      <th>supremacy_sa</th>\n",
       "      <th>supremacy_t_sa</th>\n",
       "      <th>lottery_sa</th>\n",
       "      <th>lottery_t_sa</th>\n",
       "      <th>final_ins</th>\n",
       "      <th>final_sa</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>85</th>\n",
       "      <td>High52</td>\n",
       "      <td>52 Week High</td>\n",
       "      <td>Momentum</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>0.08</td>\n",
       "      <td>(18.82)</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>0.13</td>\n",
       "      <td>(8.48)</td>\n",
       "      <td>lottery</td>\n",
       "      <td>lottery</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abnormalaccruals</td>\n",
       "      <td>Abnormal Accruals</td>\n",
       "      <td>Accruals</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>0.10</td>\n",
       "      <td>(19.16)</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>0.13</td>\n",
       "      <td>(6.35)</td>\n",
       "      <td>lottery</td>\n",
       "      <td>lottery</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Accruals</td>\n",
       "      <td>Accruals</td>\n",
       "      <td>Accruals</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>0.06</td>\n",
       "      <td>(12.45)</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>0.11</td>\n",
       "      <td>(5.06)</td>\n",
       "      <td>lottery</td>\n",
       "      <td>lottery</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Activism2</td>\n",
       "      <td>Active Shareholders</td>\n",
       "      <td>Ownership</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Adexp</td>\n",
       "      <td>Advertising Expense</td>\n",
       "      <td>R&amp;D</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>0.16</td>\n",
       "      <td>(36.87)</td>\n",
       "      <td>0.06</td>\n",
       "      <td>(11.16)</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>0.08</td>\n",
       "      <td>(3.57)</td>\n",
       "      <td>supremacy</td>\n",
       "      <td>lottery</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>Chnwc</td>\n",
       "      <td>Δ Net Working Capital</td>\n",
       "      <td>Investment Alt</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>0.10</td>\n",
       "      <td>(21.31)</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>0.14</td>\n",
       "      <td>(7.26)</td>\n",
       "      <td>lottery</td>\n",
       "      <td>lottery</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135</th>\n",
       "      <td>Orderbacklogchg</td>\n",
       "      <td>Δ Order Backlog</td>\n",
       "      <td>Accruals</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99</th>\n",
       "      <td>Investppeinv</td>\n",
       "      <td>Δ Ppe and Inv/Assets</td>\n",
       "      <td>Investment</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>0.03</td>\n",
       "      <td>(7.84)</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>lottery</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>Changeinrecommendation</td>\n",
       "      <td>Δ Recommendation</td>\n",
       "      <td>Recommendation</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>Chtax</td>\n",
       "      <td>Δ Taxes</td>\n",
       "      <td>Other</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>0.03</td>\n",
       "      <td>(7.47)</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>0.04</td>\n",
       "      <td>(2.62)</td>\n",
       "      <td>lottery</td>\n",
       "      <td>lottery</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>186 rows × 17 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                description        LongDescription    Cat_Economic safe_ins  \\\n",
       "85                   High52           52 Week High        Momentum            \n",
       "0          Abnormalaccruals      Abnormal Accruals        Accruals            \n",
       "1                  Accruals               Accruals        Accruals            \n",
       "3                 Activism2    Active Shareholders       Ownership            \n",
       "4                     Adexp    Advertising Expense             R&D            \n",
       "..                      ...                    ...             ...      ...   \n",
       "35                    Chnwc  Δ Net Working Capital  Investment Alt            \n",
       "135         Orderbacklogchg        Δ Order Backlog        Accruals            \n",
       "99             Investppeinv   Δ Ppe and Inv/Assets      Investment            \n",
       "28   Changeinrecommendation       Δ Recommendation  Recommendation            \n",
       "36                    Chtax                Δ Taxes           Other            \n",
       "\n",
       "    safe_t_ins supremacy_ins supremacy_t_ins lottery_ins lottery_t_ins  \\\n",
       "85                                                  0.08       (18.82)   \n",
       "0                                                   0.10       (19.16)   \n",
       "1                                                   0.06       (12.45)   \n",
       "3                                                                        \n",
       "4                       0.16         (36.87)        0.06       (11.16)   \n",
       "..         ...           ...             ...         ...           ...   \n",
       "35                                                  0.10       (21.31)   \n",
       "135                                                                      \n",
       "99                                                  0.03        (7.84)   \n",
       "28                                                                       \n",
       "36                                                  0.03        (7.47)   \n",
       "\n",
       "    safe_sa safe_t_sa supremacy_sa supremacy_t_sa lottery_sa lottery_t_sa  \\\n",
       "85                                                      0.13       (8.48)   \n",
       "0                                                       0.13       (6.35)   \n",
       "1                                                       0.11       (5.06)   \n",
       "3                                                                           \n",
       "4                                                       0.08       (3.57)   \n",
       "..      ...       ...          ...            ...        ...          ...   \n",
       "35                                                      0.14       (7.26)   \n",
       "135                                                                         \n",
       "99                                                                          \n",
       "28                                                                          \n",
       "36                                                      0.04       (2.62)   \n",
       "\n",
       "     final_ins final_sa  \n",
       "85     lottery  lottery  \n",
       "0      lottery  lottery  \n",
       "1      lottery  lottery  \n",
       "3         none     none  \n",
       "4    supremacy  lottery  \n",
       "..         ...      ...  \n",
       "35     lottery  lottery  \n",
       "135       none     none  \n",
       "99     lottery     none  \n",
       "28        none     none  \n",
       "36     lottery  lottery  \n",
       "\n",
       "[186 rows x 17 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "main=table_2A[['description','safe_ins', 'safe_t_ins', 'supremacy_ins','supremacy_t_ins', 'lottery_ins', 'lottery_t_ins',  'safe_sa','safe_t_sa',\n",
    "               'supremacy_sa', 'supremacy_t_sa', 'lottery_sa', 'lottery_t_sa','final_ins','final_sa' ,]]\n",
    "for _var in ['safe','supremacy','lottery']:\n",
    "    main.loc[main[_var+'_ins']<0,_var+'_t_ins']=np.nan\n",
    "    main.loc[main[_var+'_ins']<0,_var+'_ins']=np.nan\n",
    "    main.loc[main[_var+'_sa']<0,_var+'_t_sa']=np.nan\n",
    "    main.loc[main[_var+'_sa']<0,_var+'_sa']=np.nan\n",
    "    \n",
    "Cat_Economic=pd.read_csv('data/SignalDoc2.csv')[['Acronym','LongDescription','Cat_Economic']]  \n",
    "for _var in ['Acronym','LongDescription','Cat_Economic']:\n",
    "    Cat_Economic[_var]=[x.title() for x in Cat_Economic[_var]]\n",
    "Cat_Economic['LongDescription']=[x.replace(' To ',' to ').replace(' In ',' in ').replace(' Of ',' of ').replace(' On ',' on ').replace(' And ',' and ').replace(' With',' with').replace('Eps ','EPS ')\n",
    "                              for x in Cat_Economic['LongDescription']]\n",
    "main2=pd.merge(Cat_Economic.rename(columns={'Acronym':'description'}),main,on=['description'],how='right')\n",
    "main2['LongDescription']=[x.replace('Change in ','Δ ') for x in main2['LongDescription']]\n",
    "main2=main2.sort_values('LongDescription')\n",
    "main2.to_excel('results/Online_Appendix_Table_A4.xlsx',index=False)\n",
    "main2.replace(np.nan,'')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "a98f8e6a",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T18:17:30.018417Z",
     "start_time": "2025-06-30T18:17:29.963141Z"
    },
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style type=\"text/css\">\n",
       "</style>\n",
       "<table id=\"T_03ae6\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th class=\"blank level0\" >&nbsp;</th>\n",
       "      <th id=\"T_03ae6_level0_col0\" class=\"col_heading level0 col0\" >Cat_Economic</th>\n",
       "      <th id=\"T_03ae6_level0_col1\" class=\"col_heading level0 col1\" >name_Analyst Reports</th>\n",
       "      <th id=\"T_03ae6_level0_col2\" class=\"col_heading level0 col2\" >safe_Analyst Reports</th>\n",
       "      <th id=\"T_03ae6_level0_col3\" class=\"col_heading level0 col3\" >supremacy_Analyst Reports</th>\n",
       "      <th id=\"T_03ae6_level0_col4\" class=\"col_heading level0 col4\" >lottery_Analyst Reports</th>\n",
       "      <th id=\"T_03ae6_level0_col5\" class=\"col_heading level0 col5\" >none_Analyst Reports</th>\n",
       "      <th id=\"T_03ae6_level0_col6\" class=\"col_heading level0 col6\" >safe_SA</th>\n",
       "      <th id=\"T_03ae6_level0_col7\" class=\"col_heading level0 col7\" >supremacy_SA</th>\n",
       "      <th id=\"T_03ae6_level0_col8\" class=\"col_heading level0 col8\" >lottery_SA</th>\n",
       "      <th id=\"T_03ae6_level0_col9\" class=\"col_heading level0 col9\" >none_SA</th>\n",
       "      <th id=\"T_03ae6_level0_col10\" class=\"col_heading level0 col10\" >name_SA</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
       "      <td id=\"T_03ae6_row0_col0\" class=\"data row0 col0\" >Accruals</td>\n",
       "      <td id=\"T_03ae6_row0_col1\" class=\"data row0 col1\" >Accruals [5]</td>\n",
       "      <td id=\"T_03ae6_row0_col2\" class=\"data row0 col2\" >0%</td>\n",
       "      <td id=\"T_03ae6_row0_col3\" class=\"data row0 col3\" >20%</td>\n",
       "      <td id=\"T_03ae6_row0_col4\" class=\"data row0 col4\" >40%</td>\n",
       "      <td id=\"T_03ae6_row0_col5\" class=\"data row0 col5\" >40%</td>\n",
       "      <td id=\"T_03ae6_row0_col6\" class=\"data row0 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row0_col7\" class=\"data row0 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row0_col8\" class=\"data row0 col8\" >60%</td>\n",
       "      <td id=\"T_03ae6_row0_col9\" class=\"data row0 col9\" >40%</td>\n",
       "      <td id=\"T_03ae6_row0_col10\" class=\"data row0 col10\" >Accruals [5]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row1\" class=\"row_heading level0 row1\" >1</th>\n",
       "      <td id=\"T_03ae6_row1_col0\" class=\"data row1 col0\" >Asset Composition</td>\n",
       "      <td id=\"T_03ae6_row1_col1\" class=\"data row1 col1\" >Asset Composition [4]</td>\n",
       "      <td id=\"T_03ae6_row1_col2\" class=\"data row1 col2\" >0%</td>\n",
       "      <td id=\"T_03ae6_row1_col3\" class=\"data row1 col3\" >25%</td>\n",
       "      <td id=\"T_03ae6_row1_col4\" class=\"data row1 col4\" >50%</td>\n",
       "      <td id=\"T_03ae6_row1_col5\" class=\"data row1 col5\" >25%</td>\n",
       "      <td id=\"T_03ae6_row1_col6\" class=\"data row1 col6\" >50%</td>\n",
       "      <td id=\"T_03ae6_row1_col7\" class=\"data row1 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row1_col8\" class=\"data row1 col8\" >25%</td>\n",
       "      <td id=\"T_03ae6_row1_col9\" class=\"data row1 col9\" >25%</td>\n",
       "      <td id=\"T_03ae6_row1_col10\" class=\"data row1 col10\" >Asset Composition [4]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row2\" class=\"row_heading level0 row2\" >2</th>\n",
       "      <td id=\"T_03ae6_row2_col0\" class=\"data row2 col0\" >Cash Flow Risk</td>\n",
       "      <td id=\"T_03ae6_row2_col1\" class=\"data row2 col1\" >Cash Flow Risk [1]</td>\n",
       "      <td id=\"T_03ae6_row2_col2\" class=\"data row2 col2\" >0%</td>\n",
       "      <td id=\"T_03ae6_row2_col3\" class=\"data row2 col3\" >0%</td>\n",
       "      <td id=\"T_03ae6_row2_col4\" class=\"data row2 col4\" >100%</td>\n",
       "      <td id=\"T_03ae6_row2_col5\" class=\"data row2 col5\" >0%</td>\n",
       "      <td id=\"T_03ae6_row2_col6\" class=\"data row2 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row2_col7\" class=\"data row2 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row2_col8\" class=\"data row2 col8\" >100%</td>\n",
       "      <td id=\"T_03ae6_row2_col9\" class=\"data row2 col9\" >0%</td>\n",
       "      <td id=\"T_03ae6_row2_col10\" class=\"data row2 col10\" >Cash Flow Risk [1]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row3\" class=\"row_heading level0 row3\" >3</th>\n",
       "      <td id=\"T_03ae6_row3_col0\" class=\"data row3 col0\" >Composite Accounting</td>\n",
       "      <td id=\"T_03ae6_row3_col1\" class=\"data row3 col1\" >Composite Accounting [5]</td>\n",
       "      <td id=\"T_03ae6_row3_col2\" class=\"data row3 col2\" >20%</td>\n",
       "      <td id=\"T_03ae6_row3_col3\" class=\"data row3 col3\" >20%</td>\n",
       "      <td id=\"T_03ae6_row3_col4\" class=\"data row3 col4\" >40%</td>\n",
       "      <td id=\"T_03ae6_row3_col5\" class=\"data row3 col5\" >20%</td>\n",
       "      <td id=\"T_03ae6_row3_col6\" class=\"data row3 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row3_col7\" class=\"data row3 col7\" >20%</td>\n",
       "      <td id=\"T_03ae6_row3_col8\" class=\"data row3 col8\" >0%</td>\n",
       "      <td id=\"T_03ae6_row3_col9\" class=\"data row3 col9\" >80%</td>\n",
       "      <td id=\"T_03ae6_row3_col10\" class=\"data row3 col10\" >Composite Accounting [5]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row4\" class=\"row_heading level0 row4\" >4</th>\n",
       "      <td id=\"T_03ae6_row4_col0\" class=\"data row4 col0\" >Default Risk</td>\n",
       "      <td id=\"T_03ae6_row4_col1\" class=\"data row4 col1\" >Default Risk [1]</td>\n",
       "      <td id=\"T_03ae6_row4_col2\" class=\"data row4 col2\" >0%</td>\n",
       "      <td id=\"T_03ae6_row4_col3\" class=\"data row4 col3\" >0%</td>\n",
       "      <td id=\"T_03ae6_row4_col4\" class=\"data row4 col4\" >100%</td>\n",
       "      <td id=\"T_03ae6_row4_col5\" class=\"data row4 col5\" >0%</td>\n",
       "      <td id=\"T_03ae6_row4_col6\" class=\"data row4 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row4_col7\" class=\"data row4 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row4_col8\" class=\"data row4 col8\" >100%</td>\n",
       "      <td id=\"T_03ae6_row4_col9\" class=\"data row4 col9\" >0%</td>\n",
       "      <td id=\"T_03ae6_row4_col10\" class=\"data row4 col10\" >Default Risk [1]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row5\" class=\"row_heading level0 row5\" >5</th>\n",
       "      <td id=\"T_03ae6_row5_col0\" class=\"data row5 col0\" >Earnings Event</td>\n",
       "      <td id=\"T_03ae6_row5_col1\" class=\"data row5 col1\" >Earnings Event [2]</td>\n",
       "      <td id=\"T_03ae6_row5_col2\" class=\"data row5 col2\" >0%</td>\n",
       "      <td id=\"T_03ae6_row5_col3\" class=\"data row5 col3\" >0%</td>\n",
       "      <td id=\"T_03ae6_row5_col4\" class=\"data row5 col4\" >50%</td>\n",
       "      <td id=\"T_03ae6_row5_col5\" class=\"data row5 col5\" >50%</td>\n",
       "      <td id=\"T_03ae6_row5_col6\" class=\"data row5 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row5_col7\" class=\"data row5 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row5_col8\" class=\"data row5 col8\" >50%</td>\n",
       "      <td id=\"T_03ae6_row5_col9\" class=\"data row5 col9\" >50%</td>\n",
       "      <td id=\"T_03ae6_row5_col10\" class=\"data row5 col10\" >Earnings Event [2]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row6\" class=\"row_heading level0 row6\" >6</th>\n",
       "      <td id=\"T_03ae6_row6_col0\" class=\"data row6 col0\" >Earnings Forecast</td>\n",
       "      <td id=\"T_03ae6_row6_col1\" class=\"data row6 col1\" >Earnings Forecast [5]</td>\n",
       "      <td id=\"T_03ae6_row6_col2\" class=\"data row6 col2\" >0%</td>\n",
       "      <td id=\"T_03ae6_row6_col3\" class=\"data row6 col3\" >0%</td>\n",
       "      <td id=\"T_03ae6_row6_col4\" class=\"data row6 col4\" >80%</td>\n",
       "      <td id=\"T_03ae6_row6_col5\" class=\"data row6 col5\" >20%</td>\n",
       "      <td id=\"T_03ae6_row6_col6\" class=\"data row6 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row6_col7\" class=\"data row6 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row6_col8\" class=\"data row6 col8\" >100%</td>\n",
       "      <td id=\"T_03ae6_row6_col9\" class=\"data row6 col9\" >0%</td>\n",
       "      <td id=\"T_03ae6_row6_col10\" class=\"data row6 col10\" >Earnings Forecast [5]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row7\" class=\"row_heading level0 row7\" >7</th>\n",
       "      <td id=\"T_03ae6_row7_col0\" class=\"data row7 col0\" >Earnings Growth</td>\n",
       "      <td id=\"T_03ae6_row7_col1\" class=\"data row7 col1\" >Earnings Growth [3]</td>\n",
       "      <td id=\"T_03ae6_row7_col2\" class=\"data row7 col2\" >0%</td>\n",
       "      <td id=\"T_03ae6_row7_col3\" class=\"data row7 col3\" >0%</td>\n",
       "      <td id=\"T_03ae6_row7_col4\" class=\"data row7 col4\" >67%</td>\n",
       "      <td id=\"T_03ae6_row7_col5\" class=\"data row7 col5\" >33%</td>\n",
       "      <td id=\"T_03ae6_row7_col6\" class=\"data row7 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row7_col7\" class=\"data row7 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row7_col8\" class=\"data row7 col8\" >67%</td>\n",
       "      <td id=\"T_03ae6_row7_col9\" class=\"data row7 col9\" >33%</td>\n",
       "      <td id=\"T_03ae6_row7_col10\" class=\"data row7 col10\" >Earnings Growth [3]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row8\" class=\"row_heading level0 row8\" >8</th>\n",
       "      <td id=\"T_03ae6_row8_col0\" class=\"data row8 col0\" >External Financing</td>\n",
       "      <td id=\"T_03ae6_row8_col1\" class=\"data row8 col1\" >External Financing [11]</td>\n",
       "      <td id=\"T_03ae6_row8_col2\" class=\"data row8 col2\" >0%</td>\n",
       "      <td id=\"T_03ae6_row8_col3\" class=\"data row8 col3\" >18%</td>\n",
       "      <td id=\"T_03ae6_row8_col4\" class=\"data row8 col4\" >82%</td>\n",
       "      <td id=\"T_03ae6_row8_col5\" class=\"data row8 col5\" >0%</td>\n",
       "      <td id=\"T_03ae6_row8_col6\" class=\"data row8 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row8_col7\" class=\"data row8 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row8_col8\" class=\"data row8 col8\" >82%</td>\n",
       "      <td id=\"T_03ae6_row8_col9\" class=\"data row8 col9\" >18%</td>\n",
       "      <td id=\"T_03ae6_row8_col10\" class=\"data row8 col10\" >External Financing [11]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row9\" class=\"row_heading level0 row9\" >9</th>\n",
       "      <td id=\"T_03ae6_row9_col0\" class=\"data row9 col0\" >Info Proxy</td>\n",
       "      <td id=\"T_03ae6_row9_col1\" class=\"data row9 col1\" >Info Proxy [1]</td>\n",
       "      <td id=\"T_03ae6_row9_col2\" class=\"data row9 col2\" >100%</td>\n",
       "      <td id=\"T_03ae6_row9_col3\" class=\"data row9 col3\" >0%</td>\n",
       "      <td id=\"T_03ae6_row9_col4\" class=\"data row9 col4\" >0%</td>\n",
       "      <td id=\"T_03ae6_row9_col5\" class=\"data row9 col5\" >0%</td>\n",
       "      <td id=\"T_03ae6_row9_col6\" class=\"data row9 col6\" >100%</td>\n",
       "      <td id=\"T_03ae6_row9_col7\" class=\"data row9 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row9_col8\" class=\"data row9 col8\" >0%</td>\n",
       "      <td id=\"T_03ae6_row9_col9\" class=\"data row9 col9\" >0%</td>\n",
       "      <td id=\"T_03ae6_row9_col10\" class=\"data row9 col10\" >Info Proxy [1]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row10\" class=\"row_heading level0 row10\" >10</th>\n",
       "      <td id=\"T_03ae6_row10_col0\" class=\"data row10 col0\" >Investment</td>\n",
       "      <td id=\"T_03ae6_row10_col1\" class=\"data row10 col1\" >Investment [18]</td>\n",
       "      <td id=\"T_03ae6_row10_col2\" class=\"data row10 col2\" >6%</td>\n",
       "      <td id=\"T_03ae6_row10_col3\" class=\"data row10 col3\" >17%</td>\n",
       "      <td id=\"T_03ae6_row10_col4\" class=\"data row10 col4\" >72%</td>\n",
       "      <td id=\"T_03ae6_row10_col5\" class=\"data row10 col5\" >6%</td>\n",
       "      <td id=\"T_03ae6_row10_col6\" class=\"data row10 col6\" >6%</td>\n",
       "      <td id=\"T_03ae6_row10_col7\" class=\"data row10 col7\" >11%</td>\n",
       "      <td id=\"T_03ae6_row10_col8\" class=\"data row10 col8\" >67%</td>\n",
       "      <td id=\"T_03ae6_row10_col9\" class=\"data row10 col9\" >17%</td>\n",
       "      <td id=\"T_03ae6_row10_col10\" class=\"data row10 col10\" >Investment [18]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row11\" class=\"row_heading level0 row11\" >11</th>\n",
       "      <td id=\"T_03ae6_row11_col0\" class=\"data row11 col0\" >Investment Growth</td>\n",
       "      <td id=\"T_03ae6_row11_col1\" class=\"data row11 col1\" >Investment Growth [3]</td>\n",
       "      <td id=\"T_03ae6_row11_col2\" class=\"data row11 col2\" >0%</td>\n",
       "      <td id=\"T_03ae6_row11_col3\" class=\"data row11 col3\" >0%</td>\n",
       "      <td id=\"T_03ae6_row11_col4\" class=\"data row11 col4\" >100%</td>\n",
       "      <td id=\"T_03ae6_row11_col5\" class=\"data row11 col5\" >0%</td>\n",
       "      <td id=\"T_03ae6_row11_col6\" class=\"data row11 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row11_col7\" class=\"data row11 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row11_col8\" class=\"data row11 col8\" >100%</td>\n",
       "      <td id=\"T_03ae6_row11_col9\" class=\"data row11 col9\" >0%</td>\n",
       "      <td id=\"T_03ae6_row11_col10\" class=\"data row11 col10\" >Investment Growth [3]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row12\" class=\"row_heading level0 row12\" >12</th>\n",
       "      <td id=\"T_03ae6_row12_col0\" class=\"data row12 col0\" >Lead Lag</td>\n",
       "      <td id=\"T_03ae6_row12_col1\" class=\"data row12 col1\" >Lead Lag [9]</td>\n",
       "      <td id=\"T_03ae6_row12_col2\" class=\"data row12 col2\" >22%</td>\n",
       "      <td id=\"T_03ae6_row12_col3\" class=\"data row12 col3\" >11%</td>\n",
       "      <td id=\"T_03ae6_row12_col4\" class=\"data row12 col4\" >11%</td>\n",
       "      <td id=\"T_03ae6_row12_col5\" class=\"data row12 col5\" >56%</td>\n",
       "      <td id=\"T_03ae6_row12_col6\" class=\"data row12 col6\" >11%</td>\n",
       "      <td id=\"T_03ae6_row12_col7\" class=\"data row12 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row12_col8\" class=\"data row12 col8\" >22%</td>\n",
       "      <td id=\"T_03ae6_row12_col9\" class=\"data row12 col9\" >67%</td>\n",
       "      <td id=\"T_03ae6_row12_col10\" class=\"data row12 col10\" >Lead Lag [9]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row13\" class=\"row_heading level0 row13\" >13</th>\n",
       "      <td id=\"T_03ae6_row13_col0\" class=\"data row13 col0\" >Leverage</td>\n",
       "      <td id=\"T_03ae6_row13_col1\" class=\"data row13 col1\" >Leverage [4]</td>\n",
       "      <td id=\"T_03ae6_row13_col2\" class=\"data row13 col2\" >25%</td>\n",
       "      <td id=\"T_03ae6_row13_col3\" class=\"data row13 col3\" >0%</td>\n",
       "      <td id=\"T_03ae6_row13_col4\" class=\"data row13 col4\" >25%</td>\n",
       "      <td id=\"T_03ae6_row13_col5\" class=\"data row13 col5\" >50%</td>\n",
       "      <td id=\"T_03ae6_row13_col6\" class=\"data row13 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row13_col7\" class=\"data row13 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row13_col8\" class=\"data row13 col8\" >25%</td>\n",
       "      <td id=\"T_03ae6_row13_col9\" class=\"data row13 col9\" >75%</td>\n",
       "      <td id=\"T_03ae6_row13_col10\" class=\"data row13 col10\" >Leverage [4]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row14\" class=\"row_heading level0 row14\" >14</th>\n",
       "      <td id=\"T_03ae6_row14_col0\" class=\"data row14 col0\" >Liquidity</td>\n",
       "      <td id=\"T_03ae6_row14_col1\" class=\"data row14 col1\" >Liquidity [9]</td>\n",
       "      <td id=\"T_03ae6_row14_col2\" class=\"data row14 col2\" >22%</td>\n",
       "      <td id=\"T_03ae6_row14_col3\" class=\"data row14 col3\" >0%</td>\n",
       "      <td id=\"T_03ae6_row14_col4\" class=\"data row14 col4\" >67%</td>\n",
       "      <td id=\"T_03ae6_row14_col5\" class=\"data row14 col5\" >11%</td>\n",
       "      <td id=\"T_03ae6_row14_col6\" class=\"data row14 col6\" >11%</td>\n",
       "      <td id=\"T_03ae6_row14_col7\" class=\"data row14 col7\" >11%</td>\n",
       "      <td id=\"T_03ae6_row14_col8\" class=\"data row14 col8\" >56%</td>\n",
       "      <td id=\"T_03ae6_row14_col9\" class=\"data row14 col9\" >22%</td>\n",
       "      <td id=\"T_03ae6_row14_col10\" class=\"data row14 col10\" >Liquidity [9]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row15\" class=\"row_heading level0 row15\" >15</th>\n",
       "      <td id=\"T_03ae6_row15_col0\" class=\"data row15 col0\" >Long Term Reversal</td>\n",
       "      <td id=\"T_03ae6_row15_col1\" class=\"data row15 col1\" >Long Term Reversal [6]</td>\n",
       "      <td id=\"T_03ae6_row15_col2\" class=\"data row15 col2\" >0%</td>\n",
       "      <td id=\"T_03ae6_row15_col3\" class=\"data row15 col3\" >83%</td>\n",
       "      <td id=\"T_03ae6_row15_col4\" class=\"data row15 col4\" >17%</td>\n",
       "      <td id=\"T_03ae6_row15_col5\" class=\"data row15 col5\" >0%</td>\n",
       "      <td id=\"T_03ae6_row15_col6\" class=\"data row15 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row15_col7\" class=\"data row15 col7\" >50%</td>\n",
       "      <td id=\"T_03ae6_row15_col8\" class=\"data row15 col8\" >33%</td>\n",
       "      <td id=\"T_03ae6_row15_col9\" class=\"data row15 col9\" >17%</td>\n",
       "      <td id=\"T_03ae6_row15_col10\" class=\"data row15 col10\" >Long Term Reversal [6]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row16\" class=\"row_heading level0 row16\" >16</th>\n",
       "      <td id=\"T_03ae6_row16_col0\" class=\"data row16 col0\" >Momentum</td>\n",
       "      <td id=\"T_03ae6_row16_col1\" class=\"data row16 col1\" >Momentum [9]</td>\n",
       "      <td id=\"T_03ae6_row16_col2\" class=\"data row16 col2\" >0%</td>\n",
       "      <td id=\"T_03ae6_row16_col3\" class=\"data row16 col3\" >0%</td>\n",
       "      <td id=\"T_03ae6_row16_col4\" class=\"data row16 col4\" >67%</td>\n",
       "      <td id=\"T_03ae6_row16_col5\" class=\"data row16 col5\" >33%</td>\n",
       "      <td id=\"T_03ae6_row16_col6\" class=\"data row16 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row16_col7\" class=\"data row16 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row16_col8\" class=\"data row16 col8\" >56%</td>\n",
       "      <td id=\"T_03ae6_row16_col9\" class=\"data row16 col9\" >44%</td>\n",
       "      <td id=\"T_03ae6_row16_col10\" class=\"data row16 col10\" >Momentum [9]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row17\" class=\"row_heading level0 row17\" >17</th>\n",
       "      <td id=\"T_03ae6_row17_col0\" class=\"data row17 col0\" >Optionrisk</td>\n",
       "      <td id=\"T_03ae6_row17_col1\" class=\"data row17 col1\" >Optionrisk [2]</td>\n",
       "      <td id=\"T_03ae6_row17_col2\" class=\"data row17 col2\" >0%</td>\n",
       "      <td id=\"T_03ae6_row17_col3\" class=\"data row17 col3\" >0%</td>\n",
       "      <td id=\"T_03ae6_row17_col4\" class=\"data row17 col4\" >50%</td>\n",
       "      <td id=\"T_03ae6_row17_col5\" class=\"data row17 col5\" >50%</td>\n",
       "      <td id=\"T_03ae6_row17_col6\" class=\"data row17 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row17_col7\" class=\"data row17 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row17_col8\" class=\"data row17 col8\" >100%</td>\n",
       "      <td id=\"T_03ae6_row17_col9\" class=\"data row17 col9\" >0%</td>\n",
       "      <td id=\"T_03ae6_row17_col10\" class=\"data row17 col10\" >Optionrisk [2]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row18\" class=\"row_heading level0 row18\" >18</th>\n",
       "      <td id=\"T_03ae6_row18_col0\" class=\"data row18 col0\" >Other</td>\n",
       "      <td id=\"T_03ae6_row18_col1\" class=\"data row18 col1\" >Other [25]</td>\n",
       "      <td id=\"T_03ae6_row18_col2\" class=\"data row18 col2\" >12%</td>\n",
       "      <td id=\"T_03ae6_row18_col3\" class=\"data row18 col3\" >28%</td>\n",
       "      <td id=\"T_03ae6_row18_col4\" class=\"data row18 col4\" >44%</td>\n",
       "      <td id=\"T_03ae6_row18_col5\" class=\"data row18 col5\" >16%</td>\n",
       "      <td id=\"T_03ae6_row18_col6\" class=\"data row18 col6\" >8%</td>\n",
       "      <td id=\"T_03ae6_row18_col7\" class=\"data row18 col7\" >20%</td>\n",
       "      <td id=\"T_03ae6_row18_col8\" class=\"data row18 col8\" >40%</td>\n",
       "      <td id=\"T_03ae6_row18_col9\" class=\"data row18 col9\" >32%</td>\n",
       "      <td id=\"T_03ae6_row18_col10\" class=\"data row18 col10\" >Other [25]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row19\" class=\"row_heading level0 row19\" >19</th>\n",
       "      <td id=\"T_03ae6_row19_col0\" class=\"data row19 col0\" >Ownership</td>\n",
       "      <td id=\"T_03ae6_row19_col1\" class=\"data row19 col1\" >Ownership [3]</td>\n",
       "      <td id=\"T_03ae6_row19_col2\" class=\"data row19 col2\" >0%</td>\n",
       "      <td id=\"T_03ae6_row19_col3\" class=\"data row19 col3\" >0%</td>\n",
       "      <td id=\"T_03ae6_row19_col4\" class=\"data row19 col4\" >33%</td>\n",
       "      <td id=\"T_03ae6_row19_col5\" class=\"data row19 col5\" >67%</td>\n",
       "      <td id=\"T_03ae6_row19_col6\" class=\"data row19 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row19_col7\" class=\"data row19 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row19_col8\" class=\"data row19 col8\" >0%</td>\n",
       "      <td id=\"T_03ae6_row19_col9\" class=\"data row19 col9\" >100%</td>\n",
       "      <td id=\"T_03ae6_row19_col10\" class=\"data row19 col10\" >Ownership [3]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row20\" class=\"row_heading level0 row20\" >20</th>\n",
       "      <td id=\"T_03ae6_row20_col0\" class=\"data row20 col0\" >Payout Indicator</td>\n",
       "      <td id=\"T_03ae6_row20_col1\" class=\"data row20 col1\" >Payout Indicator [1]</td>\n",
       "      <td id=\"T_03ae6_row20_col2\" class=\"data row20 col2\" >0%</td>\n",
       "      <td id=\"T_03ae6_row20_col3\" class=\"data row20 col3\" >0%</td>\n",
       "      <td id=\"T_03ae6_row20_col4\" class=\"data row20 col4\" >0%</td>\n",
       "      <td id=\"T_03ae6_row20_col5\" class=\"data row20 col5\" >100%</td>\n",
       "      <td id=\"T_03ae6_row20_col6\" class=\"data row20 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row20_col7\" class=\"data row20 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row20_col8\" class=\"data row20 col8\" >0%</td>\n",
       "      <td id=\"T_03ae6_row20_col9\" class=\"data row20 col9\" >100%</td>\n",
       "      <td id=\"T_03ae6_row20_col10\" class=\"data row20 col10\" >Payout Indicator [1]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row21\" class=\"row_heading level0 row21\" >21</th>\n",
       "      <td id=\"T_03ae6_row21_col0\" class=\"data row21 col0\" >Profitability</td>\n",
       "      <td id=\"T_03ae6_row21_col1\" class=\"data row21 col1\" >Profitability [9]</td>\n",
       "      <td id=\"T_03ae6_row21_col2\" class=\"data row21 col2\" >0%</td>\n",
       "      <td id=\"T_03ae6_row21_col3\" class=\"data row21 col3\" >11%</td>\n",
       "      <td id=\"T_03ae6_row21_col4\" class=\"data row21 col4\" >89%</td>\n",
       "      <td id=\"T_03ae6_row21_col5\" class=\"data row21 col5\" >0%</td>\n",
       "      <td id=\"T_03ae6_row21_col6\" class=\"data row21 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row21_col7\" class=\"data row21 col7\" >11%</td>\n",
       "      <td id=\"T_03ae6_row21_col8\" class=\"data row21 col8\" >89%</td>\n",
       "      <td id=\"T_03ae6_row21_col9\" class=\"data row21 col9\" >0%</td>\n",
       "      <td id=\"T_03ae6_row21_col10\" class=\"data row21 col10\" >Profitability [9]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row22\" class=\"row_heading level0 row22\" >22</th>\n",
       "      <td id=\"T_03ae6_row22_col0\" class=\"data row22 col0\" >R&D</td>\n",
       "      <td id=\"T_03ae6_row22_col1\" class=\"data row22 col1\" >R&D [4]</td>\n",
       "      <td id=\"T_03ae6_row22_col2\" class=\"data row22 col2\" >0%</td>\n",
       "      <td id=\"T_03ae6_row22_col3\" class=\"data row22 col3\" >75%</td>\n",
       "      <td id=\"T_03ae6_row22_col4\" class=\"data row22 col4\" >0%</td>\n",
       "      <td id=\"T_03ae6_row22_col5\" class=\"data row22 col5\" >25%</td>\n",
       "      <td id=\"T_03ae6_row22_col6\" class=\"data row22 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row22_col7\" class=\"data row22 col7\" >75%</td>\n",
       "      <td id=\"T_03ae6_row22_col8\" class=\"data row22 col8\" >25%</td>\n",
       "      <td id=\"T_03ae6_row22_col9\" class=\"data row22 col9\" >0%</td>\n",
       "      <td id=\"T_03ae6_row22_col10\" class=\"data row22 col10\" >R&D [4]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row23\" class=\"row_heading level0 row23\" >23</th>\n",
       "      <td id=\"T_03ae6_row23_col0\" class=\"data row23 col0\" >Recommendation</td>\n",
       "      <td id=\"T_03ae6_row23_col1\" class=\"data row23 col1\" >Recommendation [2]</td>\n",
       "      <td id=\"T_03ae6_row23_col2\" class=\"data row23 col2\" >0%</td>\n",
       "      <td id=\"T_03ae6_row23_col3\" class=\"data row23 col3\" >0%</td>\n",
       "      <td id=\"T_03ae6_row23_col4\" class=\"data row23 col4\" >0%</td>\n",
       "      <td id=\"T_03ae6_row23_col5\" class=\"data row23 col5\" >100%</td>\n",
       "      <td id=\"T_03ae6_row23_col6\" class=\"data row23 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row23_col7\" class=\"data row23 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row23_col8\" class=\"data row23 col8\" >0%</td>\n",
       "      <td id=\"T_03ae6_row23_col9\" class=\"data row23 col9\" >100%</td>\n",
       "      <td id=\"T_03ae6_row23_col10\" class=\"data row23 col10\" >Recommendation [2]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row24\" class=\"row_heading level0 row24\" >24</th>\n",
       "      <td id=\"T_03ae6_row24_col0\" class=\"data row24 col0\" >Risk</td>\n",
       "      <td id=\"T_03ae6_row24_col1\" class=\"data row24 col1\" >Risk [6]</td>\n",
       "      <td id=\"T_03ae6_row24_col2\" class=\"data row24 col2\" >17%</td>\n",
       "      <td id=\"T_03ae6_row24_col3\" class=\"data row24 col3\" >33%</td>\n",
       "      <td id=\"T_03ae6_row24_col4\" class=\"data row24 col4\" >33%</td>\n",
       "      <td id=\"T_03ae6_row24_col5\" class=\"data row24 col5\" >17%</td>\n",
       "      <td id=\"T_03ae6_row24_col6\" class=\"data row24 col6\" >33%</td>\n",
       "      <td id=\"T_03ae6_row24_col7\" class=\"data row24 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row24_col8\" class=\"data row24 col8\" >67%</td>\n",
       "      <td id=\"T_03ae6_row24_col9\" class=\"data row24 col9\" >0%</td>\n",
       "      <td id=\"T_03ae6_row24_col10\" class=\"data row24 col10\" >Risk [6]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row25\" class=\"row_heading level0 row25\" >25</th>\n",
       "      <td id=\"T_03ae6_row25_col0\" class=\"data row25 col0\" >Sales Growth</td>\n",
       "      <td id=\"T_03ae6_row25_col1\" class=\"data row25 col1\" >Sales Growth [6]</td>\n",
       "      <td id=\"T_03ae6_row25_col2\" class=\"data row25 col2\" >0%</td>\n",
       "      <td id=\"T_03ae6_row25_col3\" class=\"data row25 col3\" >17%</td>\n",
       "      <td id=\"T_03ae6_row25_col4\" class=\"data row25 col4\" >50%</td>\n",
       "      <td id=\"T_03ae6_row25_col5\" class=\"data row25 col5\" >33%</td>\n",
       "      <td id=\"T_03ae6_row25_col6\" class=\"data row25 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row25_col7\" class=\"data row25 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row25_col8\" class=\"data row25 col8\" >17%</td>\n",
       "      <td id=\"T_03ae6_row25_col9\" class=\"data row25 col9\" >83%</td>\n",
       "      <td id=\"T_03ae6_row25_col10\" class=\"data row25 col10\" >Sales Growth [6]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row26\" class=\"row_heading level0 row26\" >26</th>\n",
       "      <td id=\"T_03ae6_row26_col0\" class=\"data row26 col0\" >Short Sale Constraints</td>\n",
       "      <td id=\"T_03ae6_row26_col1\" class=\"data row26 col1\" >Short Sale Constraints [5]</td>\n",
       "      <td id=\"T_03ae6_row26_col2\" class=\"data row26 col2\" >0%</td>\n",
       "      <td id=\"T_03ae6_row26_col3\" class=\"data row26 col3\" >0%</td>\n",
       "      <td id=\"T_03ae6_row26_col4\" class=\"data row26 col4\" >20%</td>\n",
       "      <td id=\"T_03ae6_row26_col5\" class=\"data row26 col5\" >80%</td>\n",
       "      <td id=\"T_03ae6_row26_col6\" class=\"data row26 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row26_col7\" class=\"data row26 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row26_col8\" class=\"data row26 col8\" >20%</td>\n",
       "      <td id=\"T_03ae6_row26_col9\" class=\"data row26 col9\" >80%</td>\n",
       "      <td id=\"T_03ae6_row26_col10\" class=\"data row26 col10\" >Short Sale Constraints [5]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row27\" class=\"row_heading level0 row27\" >27</th>\n",
       "      <td id=\"T_03ae6_row27_col0\" class=\"data row27 col0\" >Short-Term Reversal</td>\n",
       "      <td id=\"T_03ae6_row27_col1\" class=\"data row27 col1\" >Short-Term Reversal [1]</td>\n",
       "      <td id=\"T_03ae6_row27_col2\" class=\"data row27 col2\" >0%</td>\n",
       "      <td id=\"T_03ae6_row27_col3\" class=\"data row27 col3\" >0%</td>\n",
       "      <td id=\"T_03ae6_row27_col4\" class=\"data row27 col4\" >100%</td>\n",
       "      <td id=\"T_03ae6_row27_col5\" class=\"data row27 col5\" >0%</td>\n",
       "      <td id=\"T_03ae6_row27_col6\" class=\"data row27 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row27_col7\" class=\"data row27 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row27_col8\" class=\"data row27 col8\" >100%</td>\n",
       "      <td id=\"T_03ae6_row27_col9\" class=\"data row27 col9\" >0%</td>\n",
       "      <td id=\"T_03ae6_row27_col10\" class=\"data row27 col10\" >Short-Term Reversal [1]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row28\" class=\"row_heading level0 row28\" >28</th>\n",
       "      <td id=\"T_03ae6_row28_col0\" class=\"data row28 col0\" >Size</td>\n",
       "      <td id=\"T_03ae6_row28_col1\" class=\"data row28 col1\" >Size [1]</td>\n",
       "      <td id=\"T_03ae6_row28_col2\" class=\"data row28 col2\" >100%</td>\n",
       "      <td id=\"T_03ae6_row28_col3\" class=\"data row28 col3\" >0%</td>\n",
       "      <td id=\"T_03ae6_row28_col4\" class=\"data row28 col4\" >0%</td>\n",
       "      <td id=\"T_03ae6_row28_col5\" class=\"data row28 col5\" >0%</td>\n",
       "      <td id=\"T_03ae6_row28_col6\" class=\"data row28 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row28_col7\" class=\"data row28 col7\" >100%</td>\n",
       "      <td id=\"T_03ae6_row28_col8\" class=\"data row28 col8\" >0%</td>\n",
       "      <td id=\"T_03ae6_row28_col9\" class=\"data row28 col9\" >0%</td>\n",
       "      <td id=\"T_03ae6_row28_col10\" class=\"data row28 col10\" >Size [1]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row29\" class=\"row_heading level0 row29\" >29</th>\n",
       "      <td id=\"T_03ae6_row29_col0\" class=\"data row29 col0\" >Valuation</td>\n",
       "      <td id=\"T_03ae6_row29_col1\" class=\"data row29 col1\" >Valuation [15]</td>\n",
       "      <td id=\"T_03ae6_row29_col2\" class=\"data row29 col2\" >7%</td>\n",
       "      <td id=\"T_03ae6_row29_col3\" class=\"data row29 col3\" >27%</td>\n",
       "      <td id=\"T_03ae6_row29_col4\" class=\"data row29 col4\" >67%</td>\n",
       "      <td id=\"T_03ae6_row29_col5\" class=\"data row29 col5\" >0%</td>\n",
       "      <td id=\"T_03ae6_row29_col6\" class=\"data row29 col6\" >7%</td>\n",
       "      <td id=\"T_03ae6_row29_col7\" class=\"data row29 col7\" >13%</td>\n",
       "      <td id=\"T_03ae6_row29_col8\" class=\"data row29 col8\" >73%</td>\n",
       "      <td id=\"T_03ae6_row29_col9\" class=\"data row29 col9\" >7%</td>\n",
       "      <td id=\"T_03ae6_row29_col10\" class=\"data row29 col10\" >Valuation [15]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row30\" class=\"row_heading level0 row30\" >30</th>\n",
       "      <td id=\"T_03ae6_row30_col0\" class=\"data row30 col0\" >Volatility</td>\n",
       "      <td id=\"T_03ae6_row30_col1\" class=\"data row30 col1\" >Volatility [5]</td>\n",
       "      <td id=\"T_03ae6_row30_col2\" class=\"data row30 col2\" >0%</td>\n",
       "      <td id=\"T_03ae6_row30_col3\" class=\"data row30 col3\" >0%</td>\n",
       "      <td id=\"T_03ae6_row30_col4\" class=\"data row30 col4\" >100%</td>\n",
       "      <td id=\"T_03ae6_row30_col5\" class=\"data row30 col5\" >0%</td>\n",
       "      <td id=\"T_03ae6_row30_col6\" class=\"data row30 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row30_col7\" class=\"data row30 col7\" >0%</td>\n",
       "      <td id=\"T_03ae6_row30_col8\" class=\"data row30 col8\" >100%</td>\n",
       "      <td id=\"T_03ae6_row30_col9\" class=\"data row30 col9\" >0%</td>\n",
       "      <td id=\"T_03ae6_row30_col10\" class=\"data row30 col10\" >Volatility [5]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03ae6_level0_row31\" class=\"row_heading level0 row31\" >31</th>\n",
       "      <td id=\"T_03ae6_row31_col0\" class=\"data row31 col0\" >Volume</td>\n",
       "      <td id=\"T_03ae6_row31_col1\" class=\"data row31 col1\" >Volume [5]</td>\n",
       "      <td id=\"T_03ae6_row31_col2\" class=\"data row31 col2\" >20%</td>\n",
       "      <td id=\"T_03ae6_row31_col3\" class=\"data row31 col3\" >0%</td>\n",
       "      <td id=\"T_03ae6_row31_col4\" class=\"data row31 col4\" >80%</td>\n",
       "      <td id=\"T_03ae6_row31_col5\" class=\"data row31 col5\" >0%</td>\n",
       "      <td id=\"T_03ae6_row31_col6\" class=\"data row31 col6\" >0%</td>\n",
       "      <td id=\"T_03ae6_row31_col7\" class=\"data row31 col7\" >20%</td>\n",
       "      <td id=\"T_03ae6_row31_col8\" class=\"data row31 col8\" >60%</td>\n",
       "      <td id=\"T_03ae6_row31_col9\" class=\"data row31 col9\" >20%</td>\n",
       "      <td id=\"T_03ae6_row31_col10\" class=\"data row31 col10\" >Volume [5]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
       "<pandas.io.formats.style.Styler at 0x1788d8c20>"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "main2['Cat_Economic']=np.where(main2['Cat_Economic']=='Investment Alt','Investment',main2['Cat_Economic'])\n",
    "main2['Cat_Economic']=np.where(main2['Cat_Economic']=='Profitability Alt','Profitability',main2['Cat_Economic'])\n",
    "table_3_ins=main2.groupby(['Cat_Economic','final_ins'])['description'].count().unstack().fillna(0).reset_index()\n",
    "table_3_ins['n']=table_3_ins[['safe','supremacy','lottery','none']].sum(1)\n",
    "table_3_ins['name']=table_3_ins[['Cat_Economic','n']].apply(lambda x: x[0]+' ['+str(int(x[1]))+']',axis=1)\n",
    "for _var in ['safe','supremacy','lottery','none']:\n",
    "    table_3_ins[_var]=table_3_ins[_var]/table_3_ins['n']\n",
    "table_3_ins=table_3_ins[['Cat_Economic','name','safe','supremacy','lottery','none']]\n",
    "table_3_ins.columns=['Cat_Economic']+[x+'_Analyst Reports' for x in table_3_ins.columns[1:]]\n",
    "\n",
    "\n",
    "table_3_sa=main2.groupby(['Cat_Economic','final_sa'])['description'].count().unstack().fillna(0).reset_index()\n",
    "table_3_sa['n']=table_3_sa[['safe','supremacy','lottery','none']].sum(1)\n",
    "table_3_sa['name']=table_3_sa[['Cat_Economic','n']].apply(lambda x: x[0]+' ['+str(int(x[1]))+']',axis=1)\n",
    "for _var in ['safe','supremacy','lottery','none']:\n",
    "    table_3_sa[_var]=table_3_sa[_var]/table_3_sa['n']\n",
    "table_3_sa=table_3_sa[['Cat_Economic','safe','supremacy','lottery','none','name',]]\n",
    "table_3_sa.columns=['Cat_Economic']+[x+'_SA' for x in table_3_sa.columns[1:]]\n",
    "\n",
    "table_3=pd.merge(table_3_ins,table_3_sa,on=['Cat_Economic'],how='left')\n",
    "table_3.to_excel('results/table_3.xlsx',index=False)\n",
    "table_3.style.format({col: \"{:.0%}\" for col in table_3.select_dtypes(include='float').columns})"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1833ec1d",
   "metadata": {},
   "source": [
    "## Table 5A"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "9ec6e34d",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T18:17:30.160484Z",
     "start_time": "2025-06-30T18:17:30.019362Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Input Variables:  ['ins_anomaly_1', 'ins_anomaly_1', 'ins_anomaly_1'] ['safe', 'supremacy', 'lottery'] [('short_leg', 'all_except_short'), ('short_leg', 'all_except_short'), ('short_leg', 'all_except_short')]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>table_name</th>\n",
       "      <th>data</th>\n",
       "      <th>most_consistent</th>\n",
       "      <th>safe</th>\n",
       "      <th>supremacy</th>\n",
       "      <th>lottery</th>\n",
       "      <th>none</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table_5A</td>\n",
       "      <td>Analyst Reports</td>\n",
       "      <td>0</td>\n",
       "      <td>12%</td>\n",
       "      <td>29%</td>\n",
       "      <td>68%</td>\n",
       "      <td>20%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table_5A</td>\n",
       "      <td>Analyst Reports</td>\n",
       "      <td>1</td>\n",
       "      <td>8%</td>\n",
       "      <td>17%</td>\n",
       "      <td>55%</td>\n",
       "      <td>20%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table_5A</td>\n",
       "      <td>SA</td>\n",
       "      <td>0</td>\n",
       "      <td>7%</td>\n",
       "      <td>16%</td>\n",
       "      <td>57%</td>\n",
       "      <td>30%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table_5A</td>\n",
       "      <td>SA</td>\n",
       "      <td>1</td>\n",
       "      <td>6%</td>\n",
       "      <td>11%</td>\n",
       "      <td>54%</td>\n",
       "      <td>30%</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  table_name             data  most_consistent safe supremacy lottery none\n",
       "0   table_5A  Analyst Reports                0  12%       29%     68%  20%\n",
       "1   table_5A  Analyst Reports                1   8%       17%     55%  20%\n",
       "0   table_5A               SA                0   7%       16%     57%  30%\n",
       "1   table_5A               SA                1   6%       11%     54%  30%"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "table_5A=ins_sa_table_func(['ins_anomaly_1','ins_anomaly_1','ins_anomaly_1'], types_list, short_long_pairs, Buy_Sell_list, 'V1', measure=2)\n",
    "table_5A['table_name']='table_5A'\n",
    "show_summary_table([table_5A],'table_5A')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e3811378",
   "metadata": {},
   "source": [
    "## Table 5C"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "85b7b79f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-01T04:46:47.755151Z",
     "start_time": "2025-07-01T04:46:47.629896Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Input Variables:  ['ins_anomaly_3', 'ins_anomaly_3', 'ins_anomaly_3'] ['safe', 'supremacy', 'lottery'] [('short_leg', 'all_except_short'), ('short_leg', 'all_except_short'), ('short_leg', 'all_except_short')]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>table_name</th>\n",
       "      <th>data</th>\n",
       "      <th>most_consistent</th>\n",
       "      <th>safe</th>\n",
       "      <th>supremacy</th>\n",
       "      <th>lottery</th>\n",
       "      <th>none</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table_5C</td>\n",
       "      <td>Analyst Reports</td>\n",
       "      <td>0</td>\n",
       "      <td>17%</td>\n",
       "      <td>27%</td>\n",
       "      <td>66%</td>\n",
       "      <td>16%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table_5C</td>\n",
       "      <td>Analyst Reports</td>\n",
       "      <td>1</td>\n",
       "      <td>12%</td>\n",
       "      <td>13%</td>\n",
       "      <td>58%</td>\n",
       "      <td>16%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table_5C</td>\n",
       "      <td>SA</td>\n",
       "      <td>0</td>\n",
       "      <td>12%</td>\n",
       "      <td>18%</td>\n",
       "      <td>56%</td>\n",
       "      <td>25%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table_5C</td>\n",
       "      <td>SA</td>\n",
       "      <td>1</td>\n",
       "      <td>10%</td>\n",
       "      <td>11%</td>\n",
       "      <td>53%</td>\n",
       "      <td>25%</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  table_name             data  most_consistent safe supremacy lottery none\n",
       "0   table_5C  Analyst Reports                0  17%       27%     66%  16%\n",
       "1   table_5C  Analyst Reports                1  12%       13%     58%  16%\n",
       "0   table_5C               SA                0  12%       18%     56%  25%\n",
       "1   table_5C               SA                1  10%       11%     53%  25%"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "table_5C=ins_sa_table_func(['ins_anomaly_3','ins_anomaly_3','ins_anomaly_3'], types_list, short_long_pairs, Buy_Sell_list, 'V1')\n",
    "table_5C['table_name']='table_5C'\n",
    "show_summary_table([table_5C],'table_5C')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3b511c87",
   "metadata": {},
   "source": [
    "## Table 5E"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "05eab5b5",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T18:17:30.265379Z",
     "start_time": "2025-06-30T18:17:30.249183Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Citations_top_quartile:  1705.75\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>table_name</th>\n",
       "      <th>data</th>\n",
       "      <th>most_consistent</th>\n",
       "      <th>safe</th>\n",
       "      <th>supremacy</th>\n",
       "      <th>lottery</th>\n",
       "      <th>none</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table_5E</td>\n",
       "      <td>Analyst Reports</td>\n",
       "      <td>0</td>\n",
       "      <td>13%</td>\n",
       "      <td>34%</td>\n",
       "      <td>70%</td>\n",
       "      <td>13%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table_5E</td>\n",
       "      <td>Analyst Reports</td>\n",
       "      <td>1</td>\n",
       "      <td>13%</td>\n",
       "      <td>17%</td>\n",
       "      <td>57%</td>\n",
       "      <td>13%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table_5E</td>\n",
       "      <td>SA</td>\n",
       "      <td>0</td>\n",
       "      <td>6%</td>\n",
       "      <td>21%</td>\n",
       "      <td>64%</td>\n",
       "      <td>21%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table_5E</td>\n",
       "      <td>SA</td>\n",
       "      <td>1</td>\n",
       "      <td>6%</td>\n",
       "      <td>13%</td>\n",
       "      <td>60%</td>\n",
       "      <td>21%</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  table_name             data  most_consistent safe supremacy lottery none\n",
       "0   table_5E  Analyst Reports                0  13%       34%     70%  13%\n",
       "1   table_5E  Analyst Reports                1  13%       17%     57%  13%\n",
       "0   table_5E               SA                0   6%       21%     64%  21%\n",
       "1   table_5E               SA                1   6%       13%     60%  21%"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Citations_top_quartile=table_2A['Citations'].quantile(0.75)\n",
    "print('Citations_top_quartile: ',Citations_top_quartile)\n",
    "table_5E=table_2A[table_2A['Citations']>Citations_top_quartile]\n",
    "table_5E['table_name']='table_5E'\n",
    "show_summary_table([table_5E],'table_5E')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bb8dd8a1",
   "metadata": {},
   "source": [
    "# Table 4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "6300ad97",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T18:18:23.699123Z",
     "start_time": "2025-06-30T18:17:30.266232Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>short_leg</th>\n",
       "      <th>level_0</th>\n",
       "      <th>level_1</th>\n",
       "      <th>description</th>\n",
       "      <th>all_except_short</th>\n",
       "      <th>short_leg</th>\n",
       "      <th>type</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>114</th>\n",
       "      <td>safe</td>\n",
       "      <td>count</td>\n",
       "      <td>AOP</td>\n",
       "      <td>4668.00</td>\n",
       "      <td>456.00</td>\n",
       "      <td>n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>safe</td>\n",
       "      <td>mean</td>\n",
       "      <td>AOP</td>\n",
       "      <td>0.35</td>\n",
       "      <td>0.39</td>\n",
       "      <td>safe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>171</th>\n",
       "      <td>supremacy</td>\n",
       "      <td>mean</td>\n",
       "      <td>AOP</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.24</td>\n",
       "      <td>supremacy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>285</th>\n",
       "      <td>lottery</td>\n",
       "      <td>mean</td>\n",
       "      <td>AOP</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.37</td>\n",
       "      <td>lottery</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>safe</td>\n",
       "      <td>std</td>\n",
       "      <td>AOP</td>\n",
       "      <td>0.48</td>\n",
       "      <td>0.49</td>\n",
       "      <td>safe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227</th>\n",
       "      <td>supremacy</td>\n",
       "      <td>mean</td>\n",
       "      <td>tang</td>\n",
       "      <td>0.43</td>\n",
       "      <td>0.29</td>\n",
       "      <td>supremacy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>341</th>\n",
       "      <td>lottery</td>\n",
       "      <td>mean</td>\n",
       "      <td>tang</td>\n",
       "      <td>0.34</td>\n",
       "      <td>0.21</td>\n",
       "      <td>lottery</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>113</th>\n",
       "      <td>safe</td>\n",
       "      <td>std</td>\n",
       "      <td>tang</td>\n",
       "      <td>0.42</td>\n",
       "      <td>0.50</td>\n",
       "      <td>safe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>284</th>\n",
       "      <td>supremacy</td>\n",
       "      <td>std</td>\n",
       "      <td>tang</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.45</td>\n",
       "      <td>supremacy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>398</th>\n",
       "      <td>lottery</td>\n",
       "      <td>std</td>\n",
       "      <td>tang</td>\n",
       "      <td>0.47</td>\n",
       "      <td>0.41</td>\n",
       "      <td>lottery</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>399 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "short_leg    level_0 level_1 description  all_except_short  short_leg  \\\n",
       "114             safe   count         AOP           4668.00     456.00   \n",
       "0               safe    mean         AOP              0.35       0.39   \n",
       "171        supremacy    mean         AOP              0.40       0.24   \n",
       "285          lottery    mean         AOP              0.25       0.37   \n",
       "57              safe     std         AOP              0.48       0.49   \n",
       "..               ...     ...         ...               ...        ...   \n",
       "227        supremacy    mean        tang              0.43       0.29   \n",
       "341          lottery    mean        tang              0.34       0.21   \n",
       "113             safe     std        tang              0.42       0.50   \n",
       "284        supremacy     std        tang              0.50       0.45   \n",
       "398          lottery     std        tang              0.47       0.41   \n",
       "\n",
       "short_leg       type  \n",
       "114                n  \n",
       "0               safe  \n",
       "171        supremacy  \n",
       "285          lottery  \n",
       "57              safe  \n",
       "..               ...  \n",
       "227        supremacy  \n",
       "341          lottery  \n",
       "113             safe  \n",
       "284        supremacy  \n",
       "398          lottery  \n",
       "\n",
       "[399 rows x 6 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "short_long_leg=pd.read_csv('data/signalbase_table4.csv')\n",
    "safe_supremacy_lottery=['safe','supremacy','lottery']\n",
    "order_dict=dict(zip(safe_supremacy_lottery,range(len(safe_supremacy_lottery))))\n",
    "agg_str={x:['mean','std'] for x in safe_supremacy_lottery}\n",
    "agg_str.update({'safe':['mean','std','count']})\n",
    "keep_vars=['description', 'all_except_short','short_leg','type','level_1']\n",
    "keep_vars2=['description', 'all_except_long','long_leg','type','level_1'] \n",
    "folder='V1/survey/'  \n",
    "for ins_retail in ['retail','ins']:\n",
    "    df=pd.read_csv('private/survey/full_freq_'+ins_retail+'.csv', usecols=['permno', 'yyyymm']+safe_supremacy_lottery)\n",
    "    df1=pd.merge(df, short_long_leg,on=['permno','yyyymm'],how='inner')\n",
    "    df1=df1[df1['short_leg'].notnull()].reset_index(drop=True)\n",
    "    df1['nunique_yyyymm']=df1.groupby(['permno','Anomaly'])['yyyymm'].transform('nunique')\n",
    "    df1=df1[df1['nunique_yyyymm']==12].reset_index(drop=True)\n",
    "    df1=df1.sort_values(['permno','Anomaly','yyyymm']).reset_index(drop=True)\n",
    "    df1['short_leg']=df1.groupby(['permno','Anomaly'])['short_leg'].transform('max')\n",
    "    df2=df1.groupby(['Anomaly','short_leg']).agg(agg_str).T.stack(level=0).reset_index()\n",
    "    df2['type']=df2['level_0']\n",
    "    df2.loc[df2['level_1']=='count', 'type'] ='n'\n",
    "    df2=df2.sort_values(['Anomaly','level_1']).rename(columns={'Anomaly':'description',0:'all_except_short',1:'short_leg'})\n",
    "    save_root=folder+'Buy'\n",
    "    if not os.path.exists(save_root):\n",
    "        os.makedirs(save_root)  \n",
    "    df2.to_csv(save_root+'/'+ins_retail.replace('retail','sa')+'_anomaly.csv',index=False) \n",
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "02f8a321",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T18:18:23.778151Z",
     "start_time": "2025-06-30T18:18:23.702465Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Input Variables:  ['ins_anomaly', 'ins_anomaly', 'ins_anomaly'] ['safe', 'supremacy', 'lottery'] [('short_leg', 'all_except_short'), ('short_leg', 'all_except_short'), ('short_leg', 'all_except_short')]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>table_name</th>\n",
       "      <th>data</th>\n",
       "      <th>most_consistent</th>\n",
       "      <th>safe</th>\n",
       "      <th>supremacy</th>\n",
       "      <th>lottery</th>\n",
       "      <th>none</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table_4</td>\n",
       "      <td>Analyst Reports</td>\n",
       "      <td>0</td>\n",
       "      <td>19%</td>\n",
       "      <td>32%</td>\n",
       "      <td>70%</td>\n",
       "      <td>6%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table_4</td>\n",
       "      <td>Analyst Reports</td>\n",
       "      <td>1</td>\n",
       "      <td>17%</td>\n",
       "      <td>13%</td>\n",
       "      <td>64%</td>\n",
       "      <td>6%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table_4</td>\n",
       "      <td>SA</td>\n",
       "      <td>0</td>\n",
       "      <td>8%</td>\n",
       "      <td>27%</td>\n",
       "      <td>88%</td>\n",
       "      <td>2%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table_4</td>\n",
       "      <td>SA</td>\n",
       "      <td>1</td>\n",
       "      <td>6%</td>\n",
       "      <td>10%</td>\n",
       "      <td>81%</td>\n",
       "      <td>2%</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  table_name             data  most_consistent safe supremacy lottery none\n",
       "0    table_4  Analyst Reports                0  19%       32%     70%   6%\n",
       "1    table_4  Analyst Reports                1  17%       13%     64%   6%\n",
       "0    table_4               SA                0   8%       27%     88%   2%\n",
       "1    table_4               SA                1   6%       10%     81%   2%"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "table_4=ins_sa_table_func(['ins_anomaly','ins_anomaly','ins_anomaly'], types_list, short_long_pairs, Buy_Sell_list, 'V1/survey/')\n",
    "table_4['table_name']='table_4'\n",
    "show_summary_table([table_4],'table_4')  #Here the 'SA' refers to 'retail'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "9e93f250",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T18:18:23.784749Z",
     "start_time": "2025-06-30T18:18:23.779217Z"
    },
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.6521739130434783\n",
      "0.9117647058823529\n"
     ]
    }
   ],
   "source": [
    "compare_ins=pd.merge(table_2A[table_2A['final_ins'].isin(['safe','supremacy','lottery'])][['description','final_ins']],\n",
    "                 table_4[table_4['final_ins'].isin(['safe','supremacy','lottery'])][['description','final_ins']],on='description',how='inner')\n",
    "print(len(compare_ins[compare_ins['final_ins_x']==compare_ins['final_ins_y']])/len(compare_ins))\n",
    "      \n",
    "compare_sa=pd.merge(table_2A[table_2A['final_sa'].isin(['safe','supremacy','lottery'])][['description','final_sa']],\n",
    "                 table_4[table_4['final_sa'].isin(['safe','supremacy','lottery'])][['description','final_sa']],on='description',how='inner')\n",
    "print(len(compare_sa[compare_sa['final_sa_x']==compare_sa['final_sa_y']])/len(compare_sa))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e346e9e7",
   "metadata": {},
   "source": [
    "# Table 5B"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "5e1d3931",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T18:19:19.234512Z",
     "start_time": "2025-06-30T18:18:23.785612Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Input Variables:  ['ins_anomaly_1', 'ins_anomaly_1', 'ins_anomaly_1'] ['safe', 'supremacy', 'lottery'] [('short_leg', 'all_except_short'), ('short_leg', 'all_except_short'), ('short_leg', 'all_except_short')]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>table_name</th>\n",
       "      <th>data</th>\n",
       "      <th>most_consistent</th>\n",
       "      <th>safe</th>\n",
       "      <th>supremacy</th>\n",
       "      <th>lottery</th>\n",
       "      <th>none</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table_5B</td>\n",
       "      <td>Analyst Reports</td>\n",
       "      <td>0</td>\n",
       "      <td>14%</td>\n",
       "      <td>31%</td>\n",
       "      <td>65%</td>\n",
       "      <td>24%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table_5B</td>\n",
       "      <td>Analyst Reports</td>\n",
       "      <td>1</td>\n",
       "      <td>9%</td>\n",
       "      <td>18%</td>\n",
       "      <td>49%</td>\n",
       "      <td>24%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table_5B</td>\n",
       "      <td>SA</td>\n",
       "      <td>0</td>\n",
       "      <td>6%</td>\n",
       "      <td>21%</td>\n",
       "      <td>59%</td>\n",
       "      <td>31%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table_5B</td>\n",
       "      <td>SA</td>\n",
       "      <td>1</td>\n",
       "      <td>4%</td>\n",
       "      <td>10%</td>\n",
       "      <td>54%</td>\n",
       "      <td>31%</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  table_name             data  most_consistent safe supremacy lottery none\n",
       "0   table_5B  Analyst Reports                0  14%       31%     65%  24%\n",
       "1   table_5B  Analyst Reports                1   9%       18%     49%  24%\n",
       "0   table_5B               SA                0   6%       21%     59%  31%\n",
       "1   table_5B               SA                1   4%       10%     54%  31%"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "keep_vars=['description','all_except_short','short_leg','type','level_1']\n",
    "keep_vars2=['description','all_except_long','long_leg','type','level_1']\n",
    "safe_supremacy_lottery=['safe_1','supremacy_1','lottery_1']\n",
    "agg_str={x:['mean','std'] for x in safe_supremacy_lottery}\n",
    "agg_str.update({'safe_1':['mean','std','count']})\n",
    "\n",
    "sample='Buy'\n",
    "for SA_ins in ['SA','ins']:\n",
    "    permno_yymm_leg=pd.read_csv('data/permno_yymm_leg_'+SA_ins+'.csv')\n",
    "    df=pd.read_csv('private/'+SA_ins+'/full_freq_raw.csv',usecols=['id','permno', 'yyyymm','word_length','BUYSELL']+safe_supremacy_lottery)\n",
    "    df=pd.merge(df, permno_yymm_leg,on=['permno','yyyymm'],how='inner')\n",
    "    df2=df[df['BUYSELL']==1].groupby(['Anomaly','short_leg']).agg(agg_str).T.stack(level=0).reset_index()\n",
    "    df2[['type','k123']]=df2['level_0'].str.rsplit('_', n=1, expand=True) \n",
    "    df2.loc[df2['level_1']=='count', 'type'] ='n'\n",
    "    df2=df2.sort_values(['k123','Anomaly','level_1']).rename(columns={'Anomaly':'description', 0:'all_except_short',1:'short_leg'})\n",
    "\n",
    "    save_root='V1/raw/'+sample+'/'\n",
    "    if not os.path.exists(save_root):\n",
    "        os.makedirs(save_root)  \n",
    "    df2[keep_vars].to_csv(save_root+SA_ins.lower()+'_anomaly_1.csv',index=False)\n",
    "\n",
    "table_5B=ins_sa_table_func(ins_sa_doc, types_list, short_long_pairs, Buy_Sell_list, 'V1/raw/')\n",
    "table_5B['table_name']='table_5B'\n",
    "show_summary_table([table_5B],'table_5B')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fae42381",
   "metadata": {},
   "source": [
    "# Table 5D"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "66f7a98b",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T18:20:29.107447Z",
     "start_time": "2025-06-30T18:19:19.235813Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Input Variables:  ['ins_anomaly_bert1', 'ins_anomaly_bert1', 'ins_anomaly_bert1'] ['safe', 'supremacy', 'lottery'] [('short_leg', 'all_except_short'), ('short_leg', 'all_except_short'), ('short_leg', 'all_except_short')]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>table_name</th>\n",
       "      <th>data</th>\n",
       "      <th>most_consistent</th>\n",
       "      <th>safe</th>\n",
       "      <th>supremacy</th>\n",
       "      <th>lottery</th>\n",
       "      <th>none</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table_5D</td>\n",
       "      <td>Analyst Reports</td>\n",
       "      <td>0</td>\n",
       "      <td>18%</td>\n",
       "      <td>26%</td>\n",
       "      <td>68%</td>\n",
       "      <td>12%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table_5D</td>\n",
       "      <td>Analyst Reports</td>\n",
       "      <td>1</td>\n",
       "      <td>16%</td>\n",
       "      <td>10%</td>\n",
       "      <td>62%</td>\n",
       "      <td>12%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table_5D</td>\n",
       "      <td>SA</td>\n",
       "      <td>0</td>\n",
       "      <td>13%</td>\n",
       "      <td>16%</td>\n",
       "      <td>61%</td>\n",
       "      <td>21%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table_5D</td>\n",
       "      <td>SA</td>\n",
       "      <td>1</td>\n",
       "      <td>10%</td>\n",
       "      <td>10%</td>\n",
       "      <td>59%</td>\n",
       "      <td>21%</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  table_name             data  most_consistent safe supremacy lottery none\n",
       "0   table_5D  Analyst Reports                0  18%       26%     68%  12%\n",
       "1   table_5D  Analyst Reports                1  16%       10%     62%  12%\n",
       "0   table_5D               SA                0  13%       16%     61%  21%\n",
       "1   table_5D               SA                1  10%       10%     59%  21%"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "keep_vars=['description','all_except_short','short_leg','type','level_1']\n",
    "keep_vars2=['description','all_except_long','long_leg','type','level_1']\n",
    "safe_supremacy_lottery=['safe_bert1','supremacy_bert1','lottery_bert1']\n",
    "agg_str={x:['mean','std'] for x in safe_supremacy_lottery}\n",
    "agg_str.update({'safe_bert1':['mean','std','count']})\n",
    "\n",
    "folder='V1/'\n",
    "sample='Buy'\n",
    "for SA_ins in ['SA','ins']:\n",
    "    df=pd.read_csv('private/'+SA_ins+'/bert_freq.csv',usecols=['id','permno', 'yyyymm','word_length','BUYSELL']+safe_supremacy_lottery)\n",
    "    permno_yymm_leg=pd.read_csv('data/permno_yymm_leg_'+SA_ins+'.csv')\n",
    "    df=pd.merge(df, permno_yymm_leg,on=['permno','yyyymm'],how='inner')\n",
    "\n",
    "    df2=df[df['BUYSELL']==1].groupby(['Anomaly','short_leg']).agg(agg_str).T.stack(level=0).reset_index()\n",
    "    df2[['type','k123']]=df2['level_0'].str.rsplit('_', n=1, expand=True) \n",
    "    df2.loc[df2['level_1']=='count', 'type'] ='n'\n",
    "    df2=df2.sort_values(['k123','Anomaly','level_1']).rename(columns={'Anomaly':'description', 0:'all_except_short',1:'short_leg'})\n",
    "    \n",
    "    save_root=folder+sample+'/'\n",
    "    if not os.path.exists(save_root):\n",
    "        os.makedirs(save_root)  \n",
    "    df2[keep_vars].to_csv(save_root+SA_ins.lower()+'_anomaly_bert1.csv',index=False)\n",
    "    \n",
    "    df3=df[df['BUYSELL']==1].groupby(['Anomaly','long_leg']).agg(agg_str).T.stack(level=0).reset_index()\n",
    "    df3[['type','k123']]=df3['level_0'].str.rsplit('_', n=1, expand=True) \n",
    "    df3.loc[df3['level_1']=='count', 'type'] ='n'\n",
    "    df3=df3.sort_values(['k123','Anomaly','level_1']).rename(columns={'Anomaly':'description',0:'all_except_long',1:'long_leg'})\n",
    "    df3[keep_vars2].to_csv(save_root+SA_ins.lower()+'_anomaly2_bert1.csv',index=False)\n",
    "\n",
    "table_5D=ins_sa_table_func(['ins_anomaly_bert1','ins_anomaly_bert1','ins_anomaly_bert1'], types_list, short_long_pairs, Buy_Sell_list, 'V1')\n",
    "table_5D['table_name']='table_5D'\n",
    "show_summary_table([table_5D],'table_5D')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "65e27595",
   "metadata": {},
   "source": [
    "# Tables 5F"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "5e33122f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T18:21:26.614446Z",
     "start_time": "2025-06-30T18:20:29.108679Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Input Variables:  ['ins_anomaly_1', 'ins_anomaly_1', 'ins_anomaly_1'] ['safe', 'supremacy', 'lottery'] [('short_leg', 'all_except_short'), ('short_leg', 'all_except_short'), ('short_leg', 'all_except_short')]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>table_name</th>\n",
       "      <th>data</th>\n",
       "      <th>most_consistent</th>\n",
       "      <th>safe</th>\n",
       "      <th>supremacy</th>\n",
       "      <th>lottery</th>\n",
       "      <th>none</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table_5F</td>\n",
       "      <td>Analyst Reports</td>\n",
       "      <td>0</td>\n",
       "      <td>10%</td>\n",
       "      <td>28%</td>\n",
       "      <td>68%</td>\n",
       "      <td>19%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table_5F</td>\n",
       "      <td>Analyst Reports</td>\n",
       "      <td>1</td>\n",
       "      <td>8%</td>\n",
       "      <td>18%</td>\n",
       "      <td>54%</td>\n",
       "      <td>19%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table_5F</td>\n",
       "      <td>SA</td>\n",
       "      <td>0</td>\n",
       "      <td>8%</td>\n",
       "      <td>16%</td>\n",
       "      <td>56%</td>\n",
       "      <td>30%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table_5F</td>\n",
       "      <td>SA</td>\n",
       "      <td>1</td>\n",
       "      <td>6%</td>\n",
       "      <td>11%</td>\n",
       "      <td>53%</td>\n",
       "      <td>30%</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  table_name             data  most_consistent safe supremacy lottery none\n",
       "0   table_5F  Analyst Reports                0  10%       28%     68%  19%\n",
       "1   table_5F  Analyst Reports                1   8%       18%     54%  19%\n",
       "0   table_5F               SA                0   8%       16%     56%  30%\n",
       "1   table_5F               SA                1   6%       11%     53%  30%"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "keep_vars=['description','all_except_short','short_leg','type','level_1']\n",
    "keep_vars2=['description','all_except_long','long_leg','type','level_1']\n",
    "safe_supremacy_lottery=['safe_1','supremacy_1','lottery_1']\n",
    "agg_str={x:['mean','std'] for x in safe_supremacy_lottery}\n",
    "agg_str.update({'safe_1':['mean','std','count']})\n",
    "\n",
    "SignalDoc=pd.read_csv('data/SignalDoc2.csv')[['Acronym','Predictability_in_OP','LongDescription','Year']]\n",
    "SignalDoc=SignalDoc[SignalDoc['Predictability_in_OP'].isin(['1_clear','2_likely'])].rename(\n",
    "                    columns={'Acronym':'Anomaly','LongDescription':'Description'}).drop('Predictability_in_OP',axis=1)\n",
    "SignalDoc['first_after_yyyymm']=(SignalDoc['Year']+1)*100+1\n",
    "\n",
    "sample='Buy'\n",
    "for SA_ins in ['SA','ins']:\n",
    "    df=pd.read_csv('private/'+SA_ins+'/full_freq.csv',usecols=['id','permno', 'yyyymm','word_length','BUYSELL']+safe_supremacy_lottery)\n",
    "    permno_yymm_leg=pd.read_csv('data/permno_yymm_leg_'+SA_ins+'.csv')\n",
    "    permno_yymm_leg=pd.merge(permno_yymm_leg,SignalDoc[['Anomaly','first_after_yyyymm']],on='Anomaly',how='left')\n",
    "    permno_yymm_leg=permno_yymm_leg[permno_yymm_leg['yyyymm']>=permno_yymm_leg['first_after_yyyymm']].drop('first_after_yyyymm',axis=1)\n",
    "    df1=pd.merge(df, permno_yymm_leg,on=['permno','yyyymm'],how='inner')\n",
    "\n",
    "    df2=df1[df1['BUYSELL']==1].groupby(['Anomaly','short_leg']).agg(agg_str).T.stack(level=0).reset_index()\n",
    "    df2[['type','k123']]=df2['level_0'].str.rsplit('_', n=1, expand=True) \n",
    "    df2.loc[df2['level_1']=='count', 'type'] ='n'\n",
    "    df2=df2.sort_values(['k123','Anomaly','level_1']).rename(columns={'Anomaly':'description', 0:'all_except_short',1:'short_leg'})\n",
    "\n",
    "    folder='V1/after/'\n",
    "    save_root=folder+sample+'/'\n",
    "    if not os.path.exists(save_root):\n",
    "        os.makedirs(save_root)  \n",
    "    df2[keep_vars].to_csv(save_root+SA_ins.lower()+'_anomaly_1.csv',index=False)\n",
    "\n",
    "table_5F=ins_sa_table_func(ins_sa_doc, types_list, short_long_pairs, Buy_Sell_list, 'V1/after')\n",
    "table_5F['table_name']='table_5F'\n",
    "show_summary_table([table_5F],'table_5F')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b28a7cf7",
   "metadata": {},
   "source": [
    "# Tables 5G"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "b5e82cb7",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-30T18:21:50.565820Z",
     "start_time": "2025-06-30T18:21:26.615640Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "204 51\n",
      "204 51\n",
      "Input Variables:  ['ins_anomaly_1', 'ins_anomaly_1', 'ins_anomaly_1'] ['safe', 'supremacy', 'lottery'] [('short_leg', 'all_except_short'), ('short_leg', 'all_except_short'), ('short_leg', 'all_except_short')]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>table_name</th>\n",
       "      <th>data</th>\n",
       "      <th>most_consistent</th>\n",
       "      <th>safe</th>\n",
       "      <th>supremacy</th>\n",
       "      <th>lottery</th>\n",
       "      <th>none</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table_5G</td>\n",
       "      <td>Analyst Reports</td>\n",
       "      <td>0</td>\n",
       "      <td>13%</td>\n",
       "      <td>33%</td>\n",
       "      <td>73%</td>\n",
       "      <td>16%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table_5G</td>\n",
       "      <td>Analyst Reports</td>\n",
       "      <td>1</td>\n",
       "      <td>11%</td>\n",
       "      <td>13%</td>\n",
       "      <td>60%</td>\n",
       "      <td>16%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table_5G</td>\n",
       "      <td>SA</td>\n",
       "      <td>0</td>\n",
       "      <td>9%</td>\n",
       "      <td>20%</td>\n",
       "      <td>64%</td>\n",
       "      <td>20%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table_5G</td>\n",
       "      <td>SA</td>\n",
       "      <td>1</td>\n",
       "      <td>7%</td>\n",
       "      <td>11%</td>\n",
       "      <td>62%</td>\n",
       "      <td>20%</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  table_name             data  most_consistent safe supremacy lottery none\n",
       "0   table_5G  Analyst Reports                0  13%       33%     73%  16%\n",
       "1   table_5G  Analyst Reports                1  11%       13%     60%  16%\n",
       "0   table_5G               SA                0   9%       20%     64%  20%\n",
       "1   table_5G               SA                1   7%       11%     62%  20%"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "keep_vars=['description','all_except_short','short_leg','type','level_1']\n",
    "keep_vars2=['description','all_except_long','long_leg','type','level_1']\n",
    "safe_supremacy_lottery=['safe_1','supremacy_1','lottery_1']\n",
    "agg_str={x:['mean','std'] for x in safe_supremacy_lottery}\n",
    "agg_str.update({'safe_1':['mean','std','count']})\n",
    "\n",
    "sample='Buy'\n",
    "for SA_ins in ['SA','ins']:\n",
    "    df=pd.read_csv('private/'+SA_ins+'/full_freq.csv',usecols=['id','permno', 'yyyymm','word_length','BUYSELL']+safe_supremacy_lottery)\n",
    "    permno_yymm_leg=pd.read_csv('data/permno_yymm_leg_'+SA_ins+'.csv')\n",
    "    Anomaly_types=permno_yymm_leg['Anomaly'].unique().tolist()\n",
    "    SignalDoc2=SignalDoc[SignalDoc['Anomaly'].isin(Anomaly_types)]\n",
    "    SignalDoc2['early_quartile']=pd.qcut(SignalDoc2['Year'], 4,labels=range(1,5))\n",
    "    old_anomalies=SignalDoc2[SignalDoc2['early_quartile']==1]['Anomaly'].tolist()\n",
    "    print(len(Anomaly_types),len(old_anomalies))\n",
    "    permno_yymm_leg=permno_yymm_leg[permno_yymm_leg['Anomaly'].isin(old_anomalies)]\n",
    "    df1=pd.merge(df, permno_yymm_leg,on=['permno','yyyymm'],how='inner')\n",
    "\n",
    "    df2=df1[df1['BUYSELL']==1].groupby(['Anomaly','short_leg']).agg(agg_str).T.stack(level=0).reset_index()\n",
    "    df2[['type','k123']]=df2['level_0'].str.rsplit('_', n=1, expand=True) \n",
    "    df2.loc[df2['level_1']=='count', 'type'] ='n'\n",
    "    df2=df2.sort_values(['k123','Anomaly','level_1']).rename(columns={'Anomaly':'description', 0:'all_except_short',1:'short_leg'})\n",
    "\n",
    "    folder='V1/before/'\n",
    "    save_root=folder+sample+'/'\n",
    "    if not os.path.exists(save_root):\n",
    "        os.makedirs(save_root)  \n",
    "    df2[keep_vars].to_csv(save_root+SA_ins.lower()+'_anomaly_1.csv',index=False)\n",
    "\n",
    "table_5G=ins_sa_table_func(ins_sa_doc, types_list, short_long_pairs, Buy_Sell_list, 'V1/before')\n",
    "table_5G['table_name']='table_5G'\n",
    "show_summary_table([table_5G],'table_5G')"
   ]
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "280.486px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
